{ "metadata": { "name": "Combined MLM Evaluation Suite", "version": "1.0.0", "description": "Combined suite from 18 source files.", "total_test_cases": 546, "test_type_counts": { "doctrinal_association": 221, "canonical_knowledge": 138, "contrastive_theology": 187 }, "category_counts": { "christology": 84, "pneumatology": 42, "eschatology": 36, "soteriology": 109, "theology_proper": 46, "ecclesiology": 40, "hamartiology": 34, "canonical_knowledge": 138, "spiritual_warfare": 2, "kingdom_theology": 1, "bibliology": 6, "romans_road": 8 } }, "test_cases": [ { "id": "DOC_001", "type": "doctrinal_association", "category": "christology", "difficulty": "medium", "input": "Paul teaches that the message of the [MASK] is foolishness to those who are perishing, but to us who are being saved it is the power of God.", "targets": [ "cross" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "church", "gospel", "law", "bible", "world" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "1 Corinthians 1:18", "reasoning": "Paul's theologia crucis hinges on the word 'cross' as the specific content of the kerygma. 'Gospel' is too generic \u2014 it does not capture Paul's deliberate provocation that the cross itself is the offensive content. A model predicting 'cross' shows it has encoded the Pauline association between the instrument of execution and the power of God, which is the central paradox of 1 Corinthians 1\u20132.", "surface_confounder": "gospel" }, { "id": "DOC_002", "type": "doctrinal_association", "category": "christology", "difficulty": "medium", "input": "We preach Christ [MASK], a stumbling block to Jews and folly to Gentiles.", "targets": [ "crucified" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "risen", "glorified", "seated", "born", "coming" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "1 Corinthians 1:23", "reasoning": "This tests whether the model associates 'we preach Christ' with 'crucified' rather than 'risen' or 'glorified.' Paul's rhetorical emphasis in chapters 1\u20132 is specifically on the scandal of crucifixion, not the triumph of resurrection. A model predicting 'risen' here has conflated the distinct emphases of 1 Corinthians 1\u20132 and 1 Corinthians 15.", "surface_confounder": "risen" }, { "id": "DOC_003", "type": "doctrinal_association", "category": "pneumatology", "difficulty": "medium", "input": "Do you not know that your body is a [MASK] of the Holy Spirit within you, whom you have from God?", "targets": [ "temple" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "vessel", "product", "creation", "gift", "house" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "1 Corinthians 6:19", "reasoning": "Paul's temple metaphor for the individual believer's body is distinct from the corporate temple image in 1 Corinthians 3:16. 'Temple' is the precise theological term that connects the believer's body to the sacred space of God's dwelling. 'Vessel' (a common Christian colloquialism) is a failure because it lacks the consecration and holiness connotations that Paul's naos language carries.", "surface_confounder": "vessel" }, { "id": "DOC_004", "type": "doctrinal_association", "category": "christology", "difficulty": "medium", "input": "For no one can lay a [MASK] other than that which is laid, which is Jesus Christ.", "targets": [ "foundation" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "stone", "path", "claim", "burden", "law" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "1 Corinthians 3:11", "reasoning": "Paul uses architectural metaphor to describe Christ as the foundation of the church. 'Stone' is a plausible confounder drawn from Petrine imagery (1 Peter 2:6), but Paul's specific term is 'foundation' (themelios). Predicting 'foundation' correctly demonstrates the model has encoded Paul's building metaphor in its context-specific sense, distinguishing it from the stone/cornerstone tradition.", "surface_confounder": "stone" }, { "id": "DOC_005", "type": "doctrinal_association", "category": "pneumatology", "difficulty": "easy", "input": "Now there are varieties of [MASK], but the same Spirit.", "targets": [ "gifts" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "people", "churches", "laws", "gods", "works" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "1 Corinthians 12:4", "reasoning": "This is a high-frequency verse in pneumatological discourse. The co-occurrence of 'varieties' and 'same Spirit' strongly cues 'gifts' (charismata). Failure to predict 'gifts' in top-10 would indicate the model lacks even basic encoding of the spiritual gifts discourse that dominates 1 Corinthians 12\u201314.", "surface_confounder": "" }, { "id": "DOC_006", "type": "doctrinal_association", "category": "eschatology", "difficulty": "medium", "input": "The last enemy to be [MASK] is death.", "targets": [ "destroyed" ], "foils": [], "acceptable_alternatives": [ "abolished" ], "failure_examples": [ "feared", "named", "embraced", "released", "overcome" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "1 Corinthians 15:26", "reasoning": "Paul's eschatology frames death as an active enemy that Christ will definitively 'destroy' (katargeo), not merely 'overcome' in an experiential sense. The verb carries the force of abolition and nullification. 'Overcome' is softer and misses the forensic finality of Paul's claim. A model predicting 'destroyed' has learned the aggressive, triumphalist register of Pauline eschatology.", "surface_confounder": "overcome" }, { "id": "DOC_007", "type": "doctrinal_association", "category": "soteriology", "difficulty": "easy", "input": "But by the [MASK] of God I am what I am, and his grace toward me was not in vain.", "targets": [ "grace" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "power", "law", "name", "will", "hand" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "1 Corinthians 15:10", "reasoning": "This verse repeats 'grace' twice, making the association very strong. The phrase 'I am what I am' followed by 'his grace toward me' creates a redundant context signal. Even a model with minimal theological training should predict 'grace' in top-10. Failure here indicates severe underexposure to Pauline epistles.", "surface_confounder": "" }, { "id": "DOC_008", "type": "doctrinal_association", "category": "theology_proper", "difficulty": "medium", "input": "God is [MASK], and he will not let you be tempted beyond your ability.", "targets": [ "faithful" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "merciful", "angry", "great", "just", "good" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "1 Corinthians 10:13", "reasoning": "Paul grounds the promise of limited temptation in God's faithfulness, not his mercy, power, or goodness. 'Faithful' (pistos) is the specific divine attribute invoked \u2014 it means God's covenant reliability, his refusal to abandon those in trial. A model predicting 'merciful' or 'good' has learned generic divine attributes but not the precise attribute Paul appeals to in this pastoral assurance.", "surface_confounder": "merciful" }, { "id": "DOC_009", "type": "doctrinal_association", "category": "ecclesiology", "difficulty": "medium", "input": "For as often as you eat this bread and drink the cup, you proclaim the Lord's [MASK] until he comes.", "targets": [ "death" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "name", "glory", "love", "kingdom", "return" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "1 Corinthians 11:26", "reasoning": "The Lord's Supper is a proclamation of Christ's death, not a general celebration of his name or glory. 'Death' is theologically precise \u2014 it connects the ordinance to the atonement. A model predicting 'name' or 'glory' reveals it has learned generic worship language but not the specific eucharistic theology Paul articulates: the supper is a memorial of sacrificial death with eschatological orientation ('until he comes').", "surface_confounder": "name" }, { "id": "DOC_010", "type": "doctrinal_association", "category": "hamartiology", "difficulty": "hard", "input": "If anyone [MASK] God's temple, God will destroy him, for God's temple is holy.", "targets": [ "destroys" ], "foils": [], "acceptable_alternatives": [ "defiles" ], "failure_examples": [ "enters", "builds", "leaves", "finds", "seeks" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "1 Corinthians 3:17", "reasoning": "Paul uses a lex talionis structure: the one who destroys God's temple will himself be destroyed. The verb phtheiro carries connotations of corruption and ruin. The hard difficulty reflects the fact that 'defiles' (from other temple-purity contexts) is equally plausible, and the model must activate destructive rather than merely transgressive vocabulary. 'Enters' would be a critical failure indicating the model has no sense of the temple-destruction warning.", "surface_confounder": "defiles" }, { "id": "DOC_011", "type": "doctrinal_association", "category": "eschatology", "difficulty": "hard", "input": "Each one's work will become manifest, for the Day will disclose it, because it will be revealed by [MASK].", "targets": [ "fire" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "god", "light", "angels", "truth", "time" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "1 Corinthians 3:13", "reasoning": "Paul's judgment-by-fire metaphor in 1 Corinthians 3:13\u201315 is specific: works are tested by fire, not merely disclosed by light or revealed by God directly. 'Fire' is the agent of eschatological testing. A model predicting 'light' has learned a generic revelation metaphor but not Paul's specific metallurgical/refining image. 'God' as a prediction would miss that Paul attributes the testing to fire as the instrumental means.", "surface_confounder": "light" }, { "id": "DOC_012", "type": "doctrinal_association", "category": "soteriology", "difficulty": "hard", "input": "You were [MASK] with a price; do not become bondservants of men.", "targets": [ "bought" ], "foils": [], "acceptable_alternatives": [ "purchased" ], "failure_examples": [ "saved", "blessed", "chosen", "loved", "called" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "1 Corinthians 7:23", "reasoning": "Paul's redemption language here uses the marketplace metaphor of purchase (agorazo). 'Bought' is more precise than 'saved' because it invokes the transactional dimension of atonement theology \u2014 Christ paid a price. 'Saved' would indicate the model has generic soteriological awareness but has not encoded the specific commercial metaphor Paul employs twice in this letter (6:20, 7:23).", "surface_confounder": "saved" }, { "id": "CAN_001", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "easy", "input": "For the foolishness of God is [MASK] than men, and the weakness of God is stronger than men.", "targets": [ "wiser" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "greater", "better", "higher", "older", "purer" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "1 Corinthians 1:25", "reasoning": "The parallel structure 'foolishness\u2026wiser / weakness\u2026stronger' makes 'wiser' highly predictable from the antithetical pattern. This tests basic canonical recall of one of the most quoted verses in 1 Corinthians. 'Greater' or 'better' as top predictions would indicate the model has learned comparative structures but not the specific wisdom-foolishness antithesis Paul constructs.", "surface_confounder": "" }, { "id": "CAN_002", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "Love bears all things, [MASK] all things, hopes all things, endures all things.", "targets": [ "believes" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "conquers", "heals", "knows", "sees", "changes" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "1 Corinthians 13:7", "reasoning": "The four-fold pattern 'bears\u2026believes\u2026hopes\u2026endures' is a specific Pauline sequence. 'Believes' is the second element and must be recalled precisely. The surrounding terms constrain the prediction but do not uniquely determine it \u2014 'conquers' or 'overcomes' might fit syntactically but are absent from the canonical text. Medium difficulty because the fourfold list is well-known but not as universally memorized as 13:4.", "surface_confounder": "conquers" }, { "id": "CAN_003", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "For now we see in a mirror [MASK], but then face to face.", "targets": [ "dimly" ], "foils": [], "acceptable_alternatives": [ "darkly" ], "failure_examples": [ "clearly", "brightly", "briefly", "barely", "partly" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "1 Corinthians 13:12", "reasoning": "ESV renders ainigmati as 'dimly'; KJV uses 'darkly' (through a glass, darkly). The model must predict the specific adverb that captures the indirectness of present knowledge. 'Clearly' would be a critical inversion failure. 'Partly' is theologically adjacent (Paul says 'I know in part' in the same verse) but is the wrong canonical word for this slot.", "surface_confounder": "clearly" }, { "id": "CAN_004", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "easy", "input": "When I was a child, I [MASK] like a child, I thought like a child, I reasoned like a child.", "targets": [ "spoke" ], "foils": [], "acceptable_alternatives": [ "talked" ], "failure_examples": [ "played", "cried", "lived", "ate", "sang" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "1 Corinthians 13:11", "reasoning": "The triad 'spoke\u2026thought\u2026reasoned' is Paul's progression from speech to cognition. 'Spoke' is the canonical first element. 'Played' or 'cried' would indicate the model has generic child-association patterns but no recall of this specific verse's structure. Easy difficulty because the 'like a child' repetition and the well-known status of this passage provide strong context.", "surface_confounder": "" }, { "id": "CAN_005", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "So now [MASK], hope, and love abide, these three; but the greatest of these is love.", "targets": [ "faith" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "grace", "peace", "truth", "joy", "mercy" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "1 Corinthians 13:13", "reasoning": "The triad faith-hope-love is among the most recognizable Pauline formulas. Masking the first element tests whether the model can reconstruct the triad from 'hope, and love abide.' 'Grace' is a plausible Pauline term but does not belong in this specific triad. Medium difficulty because the triad is well-known but the first position is slightly harder to predict than the climactic third.", "surface_confounder": "grace" }, { "id": "CAN_006", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "easy", "input": "O death, where is your [MASK]? O death, where is your sting?", "targets": [ "victory" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "power", "hold", "fear", "throne", "curse" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "1 Corinthians 15:55", "reasoning": "Paul's triumphal taunt over death pairs 'victory' and 'sting' in parallel rhetorical questions. The parallelism with 'sting' constrains the prediction. This is a high-frequency verse in funeral liturgy and Easter preaching. Failure to predict 'victory' in top-10 indicates very low exposure to Pauline resurrection discourse.", "surface_confounder": "" }, { "id": "CAN_007", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "Be [MASK], stand firm in the faith, act like men, be strong.", "targets": [ "watchful" ], "foils": [], "acceptable_alternatives": [ "alert" ], "failure_examples": [ "humble", "faithful", "holy", "joyful", "patient" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "1 Corinthians 16:13", "reasoning": "Paul's closing exhortation uses a sequence of imperatives: watchful, firm, courageous, strong. 'Watchful' (gregoreo) is the specific first command. 'Humble' or 'faithful' are generic Christian virtues that would fit syntactically but are not the canonical word. The imperative sequence is less frequently memorized than the love chapter, making this medium difficulty.", "surface_confounder": "humble" }, { "id": "CAN_008", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "Do not be deceived: bad company [MASK] good morals.", "targets": [ "ruins" ], "foils": [], "acceptable_alternatives": [ "corrupts" ], "failure_examples": [ "builds", "tests", "reveals", "needs", "lacks" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "1 Corinthians 15:33", "reasoning": "Paul quotes the Greek poet Menander in a proverbial warning. ESV uses 'ruins' while other translations use 'corrupts.' The sentence structure is terse and proverbial, giving minimal context beyond the subject-verb-object pattern. A model must have encoded this specific proverb. 'Builds' as a prediction would be a polarity inversion failure.", "surface_confounder": "builds" }, { "id": "CON_001", "type": "contrastive_theology", "category": "soteriology", "difficulty": "hard", "input": "Paul teaches that believers are justified freely by God's [MASK], not by human merit or works of the law.", "targets": [ "grace" ], "foils": [ "law" ], "acceptable_alternatives": [], "failure_examples": [ "power", "will", "plan", "nature", "wrath" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "1 Corinthians 6:11", "reasoning": "The grace-law antithesis is fundamental to Pauline soteriology. The sentence structure 'by God's [MASK], not by\u2026works of the law' creates a contrastive frame. The foil 'law' tests whether the model confuses the instrument of justification (grace) with the standard that condemns (law). A model ranking 'law' above 'grace' has inverted the Pauline soteriological polarity. Hard difficulty because the foil word appears later in the same sentence, creating a lexical priming effect.", "surface_confounder": "" }, { "id": "CON_002", "type": "contrastive_theology", "category": "pneumatology", "difficulty": "medium", "input": "For in one [MASK] we were all baptized into one body, whether Jews or Greeks, slaves or free.", "targets": [ "spirit" ], "foils": [ "water" ], "acceptable_alternatives": [], "failure_examples": [ "church", "name", "place", "moment", "act" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "1 Corinthians 12:13", "reasoning": "Paul describes Spirit baptism as the agent of incorporation into the body of Christ. The foil 'water' tests whether the model conflates Spirit baptism with water baptism \u2014 a common theological confusion that collapses the distinction between the sign (water) and the reality (Spirit). A model that ranks 'spirit' above 'water' has encoded the pneumatological emphasis of 1 Corinthians 12.", "surface_confounder": "" }, { "id": "CON_003", "type": "contrastive_theology", "category": "christology", "difficulty": "hard", "input": "In Adam all die, so also in [MASK] shall all be made alive.", "targets": [ "christ" ], "foils": [ "god" ], "acceptable_alternatives": [], "failure_examples": [ "heaven", "time", "faith", "hope", "death" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "1 Corinthians 15:22", "reasoning": "Paul's Adam-Christ typology is precise: the two federal heads are Adam and Christ, not Adam and God. The foil 'god' is theologically broader and technically true (all are made alive in God), but Paul's argument requires the specific christological parallel \u2014 the second Adam undoes what the first Adam did. A model ranking 'god' above 'christ' has failed to learn the typological structure of 1 Corinthians 15:21\u201322.", "surface_confounder": "" }, { "id": "CON_004", "type": "contrastive_theology", "category": "ecclesiology", "difficulty": "medium", "input": "Paul commands the church to [MASK] the immoral brother from their fellowship, for a little leaven leavens the whole lump.", "targets": [ "remove" ], "foils": [ "forgive" ], "acceptable_alternatives": [ "purge", "expel" ], "failure_examples": [ "welcome", "honor", "praise", "teach", "baptize" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "1 Corinthians 5:6-7", "reasoning": "Paul's church discipline passage in chapter 5 commands removal, not premature forgiveness. The foil 'forgive' represents the error of prioritizing individual restoration over corporate holiness \u2014 a mistake the Corinthians were making. The leaven metaphor reinforces the urgency of removal. A model ranking 'forgive' above 'remove' has not encoded the discipline logic of 1 Corinthians 5.", "surface_confounder": "" }, { "id": "CON_005", "type": "contrastive_theology", "category": "ecclesiology", "difficulty": "medium", "input": "When taking the Lord's Supper in an unworthy manner, a person eats and drinks [MASK] upon himself.", "targets": [ "judgment" ], "foils": [ "blessing" ], "acceptable_alternatives": [ "condemnation" ], "failure_examples": [ "grace", "peace", "life", "wine", "bread" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "1 Corinthians 11:29", "reasoning": "Paul warns that improper participation in the Lord's Supper brings judgment (krima), not blessing. The foil 'blessing' inverts the warning \u2014 a model predicting 'blessing' over 'judgment' has failed to encode the sacramental danger Paul describes. This is medium difficulty because 'judgment' and 'blessing' are both religiously loaded terms that commonly co-occur with eucharistic vocabulary.", "surface_confounder": "" }, { "id": "CON_006", "type": "contrastive_theology", "category": "pneumatology", "difficulty": "medium", "input": "To each is given the manifestation of the Spirit for the common [MASK].", "targets": [ "good" ], "foils": [ "glory" ], "acceptable_alternatives": [ "benefit" ], "failure_examples": [ "man", "cause", "goal", "church", "use" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "1 Corinthians 12:7", "reasoning": "Paul teaches that spiritual gifts are given for the common good (sympheron), not for common glory. The foil 'glory' represents the self-aggrandizing use of gifts that Paul combats throughout chapters 12\u201314 \u2014 the Corinthians were using gifts for personal display. A model ranking 'good' above 'glory' shows it has encoded the other-oriented purpose of charismata rather than the honor-culture distortion.", "surface_confounder": "" }, { "id": "CON_007", "type": "contrastive_theology", "category": "ecclesiology", "difficulty": "medium", "input": "Do not be deceived: neither the sexually immoral, nor idolaters, nor adulterers will [MASK] the kingdom of God.", "targets": [ "inherit" ], "foils": [ "enter" ], "acceptable_alternatives": [], "failure_examples": [ "see", "build", "want", "find", "reach" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "1 Corinthians 6:9-10", "reasoning": "Paul uses 'inherit' (kleronomeo), not 'enter' (eiserchomai), which is the synoptic gospel term. Inheritance language is covenantal \u2014 it implies a promised possession received by right of adoption, not mere access. The foil 'enter' tests whether the model has learned Paul's distinct covenantal vocabulary or defaults to the more common synoptic phrasing of kingdom access.", "surface_confounder": "" }, { "id": "CON_008", "type": "contrastive_theology", "category": "soteriology", "difficulty": "hard", "input": "You were washed, you were [MASK], you were justified in the name of the Lord Jesus Christ and by the Spirit of our God.", "targets": [ "sanctified" ], "foils": [ "saved" ], "acceptable_alternatives": [], "failure_examples": [ "baptized", "chosen", "loved", "healed", "called" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "1 Corinthians 6:11", "reasoning": "Paul's triad is washed-sanctified-justified, not washed-saved-justified. The foil 'saved' is generically correct but theologically imprecise \u2014 it collapses the ordo salutis distinctions Paul carefully maintains. 'Sanctified' (hagiazo) denotes being set apart for God, which is distinct from the forensic declaration of justification that follows. A model predicting 'sanctified' over 'saved' has encoded fine-grained Pauline soteriological vocabulary.", "surface_confounder": "saved" }, { "id": "CON_009", "type": "contrastive_theology", "category": "christology", "difficulty": "hard", "input": "For I decided to know nothing among you except Jesus Christ and him [MASK].", "targets": [ "crucified" ], "foils": [ "risen" ], "acceptable_alternatives": [], "failure_examples": [ "alone", "glorified", "seated", "exalted", "coming" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "1 Corinthians 2:2", "reasoning": "Paul's programmatic statement in 2:2 focuses on the crucifixion, not the resurrection. The foil 'risen' is theologically true of Christ but is not what Paul decided to 'know' among the Corinthians \u2014 his emphasis in chapters 1\u20132 is the scandal of the cross. The resurrection discourse comes later in chapter 15. A model ranking 'crucified' above 'risen' demonstrates sensitivity to Paul's thematic structure within the letter.", "surface_confounder": "" }, { "id": "CON_010", "type": "contrastive_theology", "category": "ecclesiology", "difficulty": "easy", "input": "If I speak in the tongues of men and of angels, but have not [MASK], I am a noisy gong or a clanging cymbal.", "targets": [ "love" ], "foils": [ "faith" ], "acceptable_alternatives": [], "failure_examples": [ "power", "wisdom", "peace", "hope", "grace" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "1 Corinthians 13:1", "reasoning": "Paul's love chapter opens with the supremacy of love over all spiritual gifts. The foil 'faith' is a plausible Pauline virtue and is even mentioned later in 13:2, but the governing theme of the entire chapter is love (agape). Easy difficulty because 1 Corinthians 13:1 is among the most memorized verses in the New Testament and the love-gifts contrast is unambiguous.", "surface_confounder": "" }, { "id": "CAN_009", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "easy", "input": "Therefore, if anyone is in Christ, he is a new [MASK].", "targets": [ "creation" ], "foils": [], "acceptable_alternatives": [ "creature" ], "failure_examples": [ "person", "man", "being", "thing", "believer" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "2 Corinthians 5:17", "reasoning": "2 Corinthians 5:17 is among the most memorized Pauline verses. 'Creation' is the ESV/NASB term reflecting kain\u0113 ktisis \u2014 a cosmological category, not merely personal renewal. A model predicting 'person' or 'man' has learned generic context but missed Paul's deliberate choice of creation-language that ties regeneration to God's creative act. 'Creature' is acceptable as the KJV rendering.", "surface_confounder": "person" }, { "id": "CAN_010", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "easy", "input": "For we walk by [MASK], not by sight.", "targets": [ "faith" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "hope", "grace", "love", "night", "feeling" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "2 Corinthians 5:7", "reasoning": "This is a high-frequency verse with a strong lexical contrast pair (faith/sight). The opposition structure 'by X, not by Y' combined with 'sight' should make 'faith' nearly deterministic. A model predicting 'hope' shows it has learned a theological-virtue domain but not the specific Pauline formula. 'Night' would indicate purely surface-level antonym reasoning.", "surface_confounder": "hope" }, { "id": "CAN_011", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "easy", "input": "My [MASK] is sufficient for you, for my power is made perfect in weakness.", "targets": [ "grace" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "love", "strength", "word", "will", "spirit" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "2 Corinthians 12:9", "reasoning": "This verse from Christ's reply to Paul about the thorn in the flesh is a cornerstone text on divine sufficiency. 'Grace' is the only theologically precise term \u2014 it is the unmerited divine provision that contrasts with human weakness. 'Strength' would invert the logic of the passage, which explicitly redefines divine power as operative through human frailty, not replacing it.", "surface_confounder": "strength" }, { "id": "CAN_012", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "Each one must give as he has decided in his heart, not reluctantly or under compulsion, for God loves a [MASK] giver.", "targets": [ "cheerful" ], "foils": [], "acceptable_alternatives": [ "generous" ], "failure_examples": [ "faithful", "great", "good", "willing", "humble" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "2 Corinthians 9:7", "reasoning": "The phrase 'cheerful giver' (hilaros in Greek, source of English 'hilarious') is a widely quoted phrase from Paul's teaching on generosity. The surrounding context of 'not reluctantly' should activate the specific attitudinal term. 'Generous' is acceptable as a reasonable alternative. A model predicting 'faithful' or 'good' has learned religious-positive collocations but not this specific canonical formula.", "surface_confounder": "generous" }, { "id": "CAN_013", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "Now the Lord is the Spirit, and where the Spirit of the Lord is, there is [MASK].", "targets": [ "freedom" ], "foils": [], "acceptable_alternatives": [ "liberty" ], "failure_examples": [ "peace", "love", "power", "life", "light" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "2 Corinthians 3:17", "reasoning": "Paul's identification of the Spirit with freedom occurs in the context of contrasting the veiled old covenant with the unveiled new covenant. 'Freedom' (eleutheria) is the specific canonical term. 'Peace' or 'love' are generic positive-valence theological terms that would indicate the model has not encoded this specific Pauline pneumatological claim. 'Liberty' is acceptable as a translation variant.", "surface_confounder": "peace" }, { "id": "CAN_014", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "But we have this [MASK] in jars of clay, to show that the surpassing power belongs to God and not to us.", "targets": [ "treasure" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "gift", "light", "spirit", "hope", "power" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "2 Corinthians 4:7", "reasoning": "Paul's 'treasure in jars of clay' metaphor is a central image in his theology of apostolic suffering. The juxtaposition with 'jars of clay' requires a word denoting something precious \u2014 'treasure' is the exact canonical term. 'Gift' or 'light' are theologically adjacent but miss the specific metaphorical economy Paul establishes between fragile vessels and surpassing value.", "surface_confounder": "gift" }, { "id": "DOC_013", "type": "doctrinal_association", "category": "theology_proper", "difficulty": "medium", "input": "Paul opens 2 Corinthians by blessing the God of all [MASK], who comforts us in all our affliction.", "targets": [ "comfort" ], "foils": [], "acceptable_alternatives": [ "consolation" ], "failure_examples": [ "power", "wisdom", "glory", "mercy", "creation" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "2 Corinthians 1:3-4", "reasoning": "The opening doxology of 2 Corinthians establishes God as the source of parakl\u0113sis (comfort/consolation). The repeated cognate pattern \u2014 'God of all comfort who comforts' \u2014 is distinctive to this letter's theology of suffering. A model predicting 'power' or 'wisdom' has defaulted to generic divine attributes rather than learning this letter's specific thematic vocabulary.", "surface_confounder": "mercy" }, { "id": "DOC_014", "type": "doctrinal_association", "category": "soteriology", "difficulty": "medium", "input": "Paul declares that for our sake God made the sinless Christ to be [MASK] on our behalf, so that we might become the righteousness of God.", "targets": [ "sin" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "holy", "pure", "flesh", "human", "cursed" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "2 Corinthians 5:21", "reasoning": "This is Paul's great exchange formula \u2014 Christ is made sin, believers become righteousness. The paradox of a sinless being 'made sin' is theologically precise and resists substitution. 'Cursed' approximates Galatians 3:13 but misses the specific 2 Corinthians formulation. A model predicting 'holy' or 'pure' has inverted the doctrinal logic entirely, failing to grasp the substitutionary exchange.", "surface_confounder": "cursed" }, { "id": "DOC_015", "type": "doctrinal_association", "category": "christology", "difficulty": "hard", "input": "Paul's paradox in 2 Corinthians is that divine [MASK] is made perfect in human weakness.", "targets": [ "power" ], "foils": [], "acceptable_alternatives": [ "strength" ], "failure_examples": [ "love", "wisdom", "mercy", "will", "glory" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "2 Corinthians 12:9", "reasoning": "The power-in-weakness paradox is the theological spine of 2 Corinthians. 'Power' (dynamis) is the specific term Paul uses \u2014 not a generic divine attribute like love or mercy. A model that predicts 'love' in this frame has learned that God-plus-weakness contexts are positive but has not encoded the specific Pauline power/weakness dialectic that defines apostolic ministry in this letter.", "surface_confounder": "love" }, { "id": "DOC_016", "type": "doctrinal_association", "category": "soteriology", "difficulty": "hard", "input": "Paul describes his apostolic role as carrying the message of reconciliation, as though God were making his [MASK] through the apostles.", "targets": [ "appeal" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "power", "will", "love", "wrath", "glory" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "2 Corinthians 5:20", "reasoning": "In 2 Corinthians 5:20 Paul says God is making his appeal (parakalountos) through apostolic agents. 'Appeal' is distinctly Pauline \u2014 it frames the gospel as divine entreaty, not coercion. A model predicting 'power' or 'will' misses the ambassadorial register that characterizes Paul's reconciliation theology. The word also tests whether the model links the apostolic 'ambassador' role with persuasion rather than authority.", "surface_confounder": "power" }, { "id": "DOC_017", "type": "doctrinal_association", "category": "pneumatology", "difficulty": "medium", "input": "The new covenant is a ministry of the [MASK], not of the letter that kills.", "targets": [ "spirit" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "law", "word", "lord", "flesh", "truth" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "2 Corinthians 3:6", "reasoning": "Paul's letter/Spirit contrast in 2 Corinthians 3 is foundational to new covenant pneumatology. The opposition 'letter that kills' creates a strong constraint requiring the life-giving counterpart. 'Spirit' is the only correct term. A model predicting 'law' confuses the Pauline antithesis \u2014 the letter is associated with the law, but the Spirit is its opposite, not the law restated.", "surface_confounder": "word" }, { "id": "DOC_018", "type": "doctrinal_association", "category": "soteriology", "difficulty": "medium", "input": "Paul teaches that though Christ was [MASK], yet for our sake he became poor, so that by his poverty we might become rich.", "targets": [ "rich" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "holy", "divine", "perfect", "pure", "strong" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "2 Corinthians 8:9", "reasoning": "Paul's rich-poor exchange in 2 Corinthians 8:9 parallels the sin-righteousness exchange of 5:21. The sentence structure \u2014 'though X, he became Y, so that we might become Z' \u2014 combined with 'poor' and 'rich' later in the sentence should constrain the mask to 'rich'. A model predicting 'holy' or 'divine' has defaulted to generic Christological attributes rather than the specific economic metaphor Paul uses to motivate Corinthian generosity.", "surface_confounder": "holy" }, { "id": "DOC_019", "type": "doctrinal_association", "category": "ecclesiology", "difficulty": "medium", "input": "Paul describes believers as the temple of the living [MASK], who dwells among his people and walks with them.", "targets": [ "god" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "spirit", "lord", "christ", "word", "king" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "2 Corinthians 6:16", "reasoning": "In 2 Corinthians 6:16, Paul applies the Old Testament temple-indwelling language to the church. The phrase 'living God' is the specific canonical formula \u2014 not 'living Spirit' or 'living Lord'. 'Spirit' is a strong confounder because 1 Corinthians 6:19 uses temple-of-the-Holy-Spirit language, but here Paul deliberately uses 'God' to echo Leviticus 26:12. Correct prediction shows the model distinguishes between Paul's varying temple formulations.", "surface_confounder": "spirit" }, { "id": "DOC_020", "type": "doctrinal_association", "category": "christology", "difficulty": "medium", "input": "Beholding the [MASK] of the Lord, believers are being transformed into the same image from one degree to another.", "targets": [ "glory" ], "foils": [], "acceptable_alternatives": [ "face" ], "failure_examples": [ "love", "truth", "word", "cross", "light" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "2 Corinthians 3:18", "reasoning": "In 2 Corinthians 3:18, Paul describes progressive transformation as mediated by beholding the Lord's glory \u2014 the unveiled radiance that surpasses Moses's fading glory. 'Glory' (doxa) is the thread running through the entire new-covenant argument of chapter 3. 'Face' is an acceptable alternative reflecting the immediate context of unveiled faces. A model predicting 'love' or 'truth' has missed the glory-theology that structures this entire passage.", "surface_confounder": "love" }, { "id": "DOC_021", "type": "doctrinal_association", "category": "hamartiology", "difficulty": "hard", "input": "The god of this [MASK] has blinded the minds of unbelievers to keep them from seeing the light of the gospel.", "targets": [ "world" ], "foils": [], "acceptable_alternatives": [ "age" ], "failure_examples": [ "life", "earth", "time", "place", "realm" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "2 Corinthians 4:4", "reasoning": "Paul's designation of Satan as 'the god of this world/age' (tou ai\u014dnos toutou) is a distinctive hamartiological claim about the present evil order. 'World' (ESV) and 'age' (NASB footnote) are both legitimate translations. 'Earth' is a surface confounder \u2014 it matches the spatial sense but misses the Pauline temporal-cosmological category. A model predicting 'life' or 'time' shows it has not encoded Satan's role as ruler of the present evil age.", "surface_confounder": "earth" }, { "id": "DOC_022", "type": "doctrinal_association", "category": "soteriology", "difficulty": "easy", "input": "For the [MASK] of Christ controls us, because we have concluded that one has died for all.", "targets": [ "love" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "word", "law", "fear", "wrath", "power" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "2 Corinthians 5:14", "reasoning": "Paul grounds apostolic motivation in the love of Christ (agap\u0113 tou Christou) \u2014 specifically Christ's sacrificial love demonstrated in his death for all. 'Love' in conjunction with 'controls us' and 'died for all' should be strongly activated. A model predicting 'fear' or 'wrath' has inverted the motivational logic from grace to coercion. 'Law' would indicate confusion between Pauline gospel-motivation and legalism.", "surface_confounder": "fear" }, { "id": "CON_011", "type": "contrastive_theology", "category": "soteriology", "difficulty": "hard", "input": "Christ was rich yet became poor, so that through his poverty believers might become [MASK].", "targets": [ "rich" ], "foils": [ "worthy" ], "acceptable_alternatives": [], "failure_examples": [ "holy", "saved", "blessed", "pure", "free" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "2 Corinthians 8:9", "reasoning": "Paul's rich-poor exchange maintains strict parallelism: Christ's riches become poverty, and believers' poverty becomes riches. The foil 'worthy' tests whether the model has learned a moralistic gloss (believers earn something) versus the actual exchange logic (believers receive what Christ had). A model ranking 'worthy' above 'rich' has imported a merit framework foreign to Paul's grace theology here.", "surface_confounder": "worthy" }, { "id": "CON_012", "type": "contrastive_theology", "category": "christology", "difficulty": "medium", "input": "Paul learned to boast in being [MASK] rather than strong, because God's power is made perfect there.", "targets": [ "weak" ], "foils": [ "humble" ], "acceptable_alternatives": [], "failure_examples": [ "small", "quiet", "broken", "poor", "low" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "2 Corinthians 12:9-10", "reasoning": "Paul's boast in weakness (astheneia) is not a general virtue of humility but a specific theological claim about where divine power operates. The foil 'humble' tests whether the model conflates Pauline weakness-theology with generic piety. Humility is a moral virtue; weakness is an existential condition. Paul does not choose weakness as an act of humility \u2014 he receives it as a site where God's power is displayed.", "surface_confounder": "humble" }, { "id": "CON_013", "type": "contrastive_theology", "category": "soteriology", "difficulty": "medium", "input": "Paul says the ministry of the new covenant gives [MASK], while the old covenant ministry brought condemnation.", "targets": [ "life" ], "foils": [ "law" ], "acceptable_alternatives": [ "righteousness" ], "failure_examples": [ "hope", "peace", "rest", "joy", "truth" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "2 Corinthians 3:6-9", "reasoning": "In 2 Corinthians 3, Paul contrasts the ministries of death/condemnation and Spirit/life (and righteousness in v. 9). The foil 'law' tests whether the model confuses the content of the old covenant with the outcome of the new. The new covenant does not give 'law' \u2014 that would be the old covenant's content. 'Life' is the correct Pauline contrast to condemnation. 'Righteousness' is acceptable per 3:9.", "surface_confounder": "law" }, { "id": "CON_014", "type": "contrastive_theology", "category": "pneumatology", "difficulty": "hard", "input": "The letter of the law kills, but the [MASK] gives life.", "targets": [ "spirit" ], "foils": [ "grace" ], "acceptable_alternatives": [], "failure_examples": [ "gospel", "lord", "word", "truth", "blood" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "2 Corinthians 3:6", "reasoning": "Paul's antithesis in 2 Corinthians 3:6 is between gramma (letter) and pneuma (Spirit), not between law and grace. The foil 'grace' tests whether the model has conflated the Pauline letter/Spirit contrast with the broader law/grace framework more prominent in Romans and Galatians. While grace is theologically related, Paul's specific point here is pneumatological \u2014 the Spirit is the agent of new-covenant life, not an abstract principle of grace.", "surface_confounder": "grace" }, { "id": "CON_015", "type": "contrastive_theology", "category": "soteriology", "difficulty": "medium", "input": "We are being transformed from one degree of [MASK] to another by the Lord who is the Spirit.", "targets": [ "glory" ], "foils": [ "grace" ], "acceptable_alternatives": [], "failure_examples": [ "faith", "love", "truth", "power", "light" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "2 Corinthians 3:18", "reasoning": "Paul's 'glory to glory' (apo dox\u0113s eis doxan) formula describes progressive transformation in the image of Christ. The foil 'grace' tests whether the model defaults to the most common Pauline positive-valence term rather than the specific glory-theology of 2 Corinthians 3. 'Grace to grace' is a Johannine phrase (John 1:16), not Pauline \u2014 a model ranking 'grace' above 'glory' here has confused its Pauline and Johannine vocabularies.", "surface_confounder": "grace" }, { "id": "CON_016", "type": "contrastive_theology", "category": "hamartiology", "difficulty": "medium", "input": "Satan disguises himself as an angel of [MASK] to deceive believers.", "targets": [ "light" ], "foils": [ "glory" ], "acceptable_alternatives": [], "failure_examples": [ "death", "god", "peace", "heaven", "mercy" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "2 Corinthians 11:14", "reasoning": "Paul warns that Satan masquerades as an angel of light (ph\u014ds), not glory (doxa). The foil 'glory' tests a subtle distinction \u2014 angelic beings are associated with both light and glory in biblical literature, but Paul's specific term here is 'light', emphasizing deceptive appearance. A model predicting 'glory' has generalized angelic-radiance language without encoding the specific Pauline warning about Satanic mimicry.", "surface_confounder": "glory" }, { "id": "CON_017", "type": "contrastive_theology", "category": "ecclesiology", "difficulty": "hard", "input": "Paul warns the Corinthians not to be unequally [MASK] with unbelievers.", "targets": [ "yoked" ], "foils": [ "joined" ], "acceptable_alternatives": [ "bound" ], "failure_examples": [ "mixed", "paired", "linked", "grouped", "matched" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "2 Corinthians 6:14", "reasoning": "Paul's 'unequally yoked' (heterozygountes) draws on the Deuteronomic prohibition against yoking different animals together (Deut 22:10). The foil 'joined' tests whether the model has encoded the specific agricultural metaphor or generalized to a generic relational term. 'Yoked' carries covenantal weight absent from 'joined' \u2014 it implies a structural binding under a common harness, not mere association.", "surface_confounder": "joined" }, { "id": "CON_018", "type": "contrastive_theology", "category": "christology", "difficulty": "medium", "input": "Paul's thorn in the flesh served to keep him [MASK], preventing self-exaltation after his surpassing revelations.", "targets": [ "humble" ], "foils": [ "weak" ], "acceptable_alternatives": [], "failure_examples": [ "silent", "small", "afraid", "broken", "careful" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "2 Corinthians 12:7", "reasoning": "The thorn's stated purpose (hina m\u0113 hyperair\u014dmai \u2014 'to keep me from becoming conceited') is anti-pride, i.e. to maintain humility. The foil 'weak' tests whether the model confuses the thorn's purpose (humility) with its effect (weakness). Paul's weakness is the medium through which God works, but the thorn's explicit function is to prevent self-exaltation. A model predicting 'weak' over 'humble' has conflated mechanism with purpose.", "surface_confounder": "weak" }, { "id": "CON_019", "type": "contrastive_theology", "category": "ecclesiology", "difficulty": "hard", "input": "Paul insists that the Corinthians' contribution should be given as a willing [MASK], not as an obligation extracted under pressure.", "targets": [ "gift" ], "foils": [ "duty" ], "acceptable_alternatives": [ "offering" ], "failure_examples": [ "tax", "payment", "tithe", "sacrifice", "tribute" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "2 Corinthians 9:5", "reasoning": "Paul uses eulogia (blessing/gift) in 2 Corinthians 9:5 to describe the collection for Jerusalem, explicitly contrasting it with pleonexia (covetous exaction). The foil 'duty' tests whether the model imports an obligatory framework into Paul's voluntary-gift theology. Paul's entire argument in chapters 8-9 grounds generosity in grace and willing hearts, not moral obligation. A model ranking 'duty' above 'gift' has missed the grace-motivation Paul carefully constructs.", "surface_confounder": "duty" }, { "id": "DOC_023", "type": "doctrinal_association", "category": "pneumatology", "difficulty": "easy", "input": "When the day of Pentecost arrived, the disciples were filled with the Holy [MASK] and began to speak in other tongues.", "targets": [ "spirit" ], "foils": [], "acceptable_alternatives": [ "ghost" ], "failure_examples": [ "fire", "water", "light", "wind", "power" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Acts 2:4", "reasoning": "The Pentecost narrative is the signature event of Acts. 'Holy ___' is one of the strongest collocations in biblical training data. A model that fails to predict 'spirit' in top-10 has not internalized even the most basic pneumatological vocabulary. 'Ghost' is acceptable as a KJV variant.", "surface_confounder": "" }, { "id": "DOC_024", "type": "doctrinal_association", "category": "pneumatology", "difficulty": "medium", "input": "But you will receive [MASK] when the Holy Spirit has come upon you, and you will be my witnesses.", "targets": [ "power" ], "foils": [], "acceptable_alternatives": [ "strength" ], "failure_examples": [ "wisdom", "money", "knowledge", "peace", "rest" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Acts 1:8", "reasoning": "Acts 1:8 is the programmatic thesis of the entire book. The Spirit's coming is linked specifically to 'power' (dynamis) for witness, not to wisdom or knowledge. A model that predicts 'wisdom' reveals it has collapsed Lukan pneumatology into a generic spiritual-gift schema rather than learning the Acts-specific power-for-mission framework.", "surface_confounder": "wisdom" }, { "id": "DOC_025", "type": "doctrinal_association", "category": "soteriology", "difficulty": "medium", "input": "Peter said to them, [MASK] and be baptized every one of you in the name of Jesus Christ for the forgiveness of your sins.", "targets": [ "repent" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "believe", "come", "pray", "listen", "confess" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Acts 2:38", "reasoning": "In the Petrine kerygma, repentance is the specific demand preceding baptism. 'Believe' is soteriologically adjacent but misses the Lukan emphasis on repentance as the entry command in apostolic preaching. A model predicting 'believe' over 'repent' has likely over-indexed on Pauline soteriology at the expense of Petrine usage in Acts.", "surface_confounder": "believe" }, { "id": "DOC_026", "type": "doctrinal_association", "category": "christology", "difficulty": "medium", "input": "There is salvation in no one else, for there is no other [MASK] under heaven given among men by which we must be saved.", "targets": [ "name" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "god", "way", "power", "law", "path" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Acts 4:12", "reasoning": "Acts 4:12 links salvation exclusively to the 'name' of Jesus \u2014 a distinctive Lukan category. 'Name' in Acts carries performative and representational force beyond a mere label. A model predicting 'way' has imported Johannine vocabulary; 'power' reveals a generic theological guess rather than recall of this specific exclusivist formula.", "surface_confounder": "way" }, { "id": "DOC_027", "type": "doctrinal_association", "category": "ecclesiology", "difficulty": "easy", "input": "They devoted themselves to the apostles' teaching and the fellowship, to the breaking of [MASK] and the prayers.", "targets": [ "bread" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "ground", "silence", "fast", "water", "chains" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Acts 2:42", "reasoning": "Acts 2:42 defines the four marks of the primitive church. 'Breaking of bread' is a fixed phrase denoting either the Lord's Supper or communal meals. The collocation is extremely strong and should be trivially predictable. Failure here indicates the model lacks basic biblical idiom exposure.", "surface_confounder": "" }, { "id": "DOC_028", "type": "doctrinal_association", "category": "soteriology", "difficulty": "medium", "input": "Believe in the Lord Jesus, and you will be [MASK], you and your household.", "targets": [ "saved" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "blessed", "healed", "forgiven", "chosen", "free" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Acts 16:31", "reasoning": "Paul's reply to the Philippian jailer is one of the clearest salvation formulas in Acts. The direct link between 'believe' and 'saved' is the defining Pauline pattern. A model predicting 'blessed' or 'healed' has failed to distinguish soteriological language from general benefaction vocabulary.", "surface_confounder": "blessed" }, { "id": "DOC_029", "type": "doctrinal_association", "category": "pneumatology", "difficulty": "hard", "input": "And Peter said to them, you will receive the [MASK] of the Holy Spirit after repentance and baptism.", "targets": [ "gift" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "power", "fire", "voice", "word", "law" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "Acts 2:38", "reasoning": "The 'gift of the Holy Spirit' is a distinctive Lukan phrase connecting Spirit reception to the conversion-initiation sequence. 'Power' is a plausible pneumatological term but belongs to the Acts 1:8 mission context, not the Acts 2:38 soteriological context. Correctly predicting 'gift' demonstrates the model has learned context-sensitive pneumatological vocabulary rather than collapsing all Spirit-language into a single cluster.", "surface_confounder": "power" }, { "id": "DOC_030", "type": "doctrinal_association", "category": "eschatology", "difficulty": "medium", "input": "This Jesus, who was taken up from you into heaven, will [MASK] in the same way as you saw him go into heaven.", "targets": [ "come" ], "foils": [], "acceptable_alternatives": [ "return" ], "failure_examples": [ "speak", "judge", "reign", "appear", "shine" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Acts 1:11", "reasoning": "The angelic promise at the Ascension establishes the parousia expectation in Acts. 'Come' is the exact canonical term. A model predicting 'judge' has jumped ahead to a secondary eschatological function rather than the primary promise of return. This tests whether the model has absorbed the Ascension-return sequence as Luke frames it.", "surface_confounder": "judge" }, { "id": "DOC_031", "type": "doctrinal_association", "category": "theology_proper", "difficulty": "hard", "input": "The God who made the world and everything in it, being Lord of heaven and earth, does not live in [MASK] made by man.", "targets": [ "temples" ], "foils": [], "acceptable_alternatives": [ "houses", "shrines" ], "failure_examples": [ "cities", "nations", "bodies", "lands", "hearts" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "Acts 17:24", "reasoning": "Paul's Areopagus speech asserts divine transcendence against pagan temple religion. 'Temples' is the specific target \u2014 the polemic is against localized deity worship. A model predicting 'cities' or 'nations' shows it has lost the cultic specificity of the argument. This tests whether the model has learned Paul's natural theology vocabulary as deployed in Acts 17.", "surface_confounder": "places" }, { "id": "DOC_032", "type": "doctrinal_association", "category": "soteriology", "difficulty": "hard", "input": "We believe that we will be saved through the [MASK] of the Lord Jesus, just as they will.", "targets": [ "grace" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "law", "blood", "death", "name", "word" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "Acts 15:11", "reasoning": "Peter's statement at the Jerusalem Council is the hinge point for Gentile inclusion. 'Grace' is the operative soteriological term that settles the law-vs-grace debate in Acts 15. A model predicting 'blood' or 'death' has imported atonement vocabulary from Hebrews or Romans rather than recognizing the specific grace-over-law argument Peter makes. 'Law' as a prediction would indicate inverted doctrinal polarity.", "surface_confounder": "blood" }, { "id": "DOC_033", "type": "doctrinal_association", "category": "pneumatology", "difficulty": "medium", "input": "And they were all [MASK] with the Holy Spirit and began to speak in other tongues as the Spirit gave them utterance.", "targets": [ "filled" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "touched", "struck", "moved", "blessed", "covered" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Acts 2:4", "reasoning": "Being 'filled' with the Spirit is the characteristic Lukan descriptor for Spirit-empowerment throughout Acts. This specific collocation appears repeatedly (2:4, 4:8, 4:31, 9:17, 13:9). A model that predicts 'touched' or 'moved' has generic religious language but not the distinctive Lukan vocabulary. The high frequency of this collocation in Acts makes it a good medium-difficulty test.", "surface_confounder": "touched" }, { "id": "DOC_034", "type": "doctrinal_association", "category": "christology", "difficulty": "hard", "input": "God has made him both Lord and [MASK], this Jesus whom you crucified.", "targets": [ "christ" ], "foils": [], "acceptable_alternatives": [ "messiah" ], "failure_examples": [ "king", "judge", "priest", "master", "savior" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "Acts 2:36", "reasoning": "The climax of Peter's Pentecost sermon declares Jesus as Lord and Christ \u2014 the messianic title, not a generic honorific. 'King' is christologically adjacent but misses the specific messianic claim. A model predicting 'savior' has imported a different christological title that lacks the Davidic-enthronement context of Acts 2:36. Correct prediction shows the model has learned the Lord-Christ doublet as a fixed pair.", "surface_confounder": "king" }, { "id": "DOC_035", "type": "doctrinal_association", "category": "ecclesiology", "difficulty": "easy", "input": "And the Lord added to their number day by day those who were being [MASK].", "targets": [ "saved" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "healed", "taught", "baptized", "chosen", "called" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Acts 2:47", "reasoning": "Acts 2:47 concludes the summary of the early church's communal life. 'Saved' is the canonical term and connects ecclesiology to soteriology \u2014 the church grows through salvation. 'Baptized' is a plausible process word but not the term Luke uses. This is easy because the soteriological verb is a strong collocation with 'added to their number.'", "surface_confounder": "baptized" }, { "id": "DOC_036", "type": "doctrinal_association", "category": "theology_proper", "difficulty": "medium", "input": "In him we live and move and have our [MASK], as even some of your own poets have said.", "targets": [ "being" ], "foils": [], "acceptable_alternatives": [ "existence" ], "failure_examples": [ "hope", "faith", "strength", "peace", "rest" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Acts 17:28", "reasoning": "Paul quotes a pagan poet to establish God's immanence in the Areopagus. The triad 'live, move, being' is a fixed expression. 'Being' is philosophically specific \u2014 it signals ontological dependence. A model predicting 'hope' or 'faith' has defaulted to generic religious vocabulary rather than the philosophical register Paul adopts in Athens.", "surface_confounder": "hope" }, { "id": "CAN_015", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "easy", "input": "But you will receive power when the Holy Spirit has come upon you, and you will be my [MASK] in Jerusalem and in all Judea and Samaria.", "targets": [ "witnesses" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "servants", "prophets", "followers", "priests", "soldiers" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Acts 1:8", "reasoning": "Acts 1:8 uses 'witnesses' as the defining role of Spirit-empowered disciples. This is a high-frequency verse and the term is embedded in a strong geographical expansion pattern (Jerusalem, Judea, Samaria, ends of the earth). Failure to recall 'witnesses' in top-10 indicates severe under-exposure to Acts.", "surface_confounder": "" }, { "id": "CAN_016", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "And when he had said these things, as they were looking on, he was lifted up, and a [MASK] took him out of their sight.", "targets": [ "cloud" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "light", "wind", "fire", "voice", "angel" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Acts 1:9", "reasoning": "The Ascension narrative uses 'cloud' as the vehicle of Jesus' departure \u2014 an echo of the Danielic Son of Man cloud theophany. A model predicting 'light' or 'fire' has confused the Ascension with the Transfiguration or Pentecost imagery. Correct recall demonstrates the model distinguishes between distinct theophanic motifs in Luke-Acts.", "surface_confounder": "light" }, { "id": "CAN_017", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "And there appeared to them tongues as of [MASK] distributing themselves, and they rested on each one of them.", "targets": [ "fire" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "light", "water", "wind", "gold", "blood" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Acts 2:3", "reasoning": "The Pentecost theophany includes tongues 'as of fire.' 'Fire' is the canonical term describing the visible manifestation of the Spirit's arrival. A model predicting 'light' has a generically theophanic representation but not the specific Pentecost imagery. This is medium because the 'tongues as of ___' collocation requires precise canonical recall.", "surface_confounder": "light" }, { "id": "CAN_018", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "hard", "input": "But Peter said, I have no silver and gold, but what I do have I give to you. In the [MASK] of Jesus Christ of Nazareth, rise up and walk!", "targets": [ "name" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "power", "spirit", "love", "blood", "word" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "Acts 3:6", "reasoning": "Peter's healing formula at the Beautiful Gate invokes the 'name' of Jesus \u2014 a key Acts motif linking christological authority to miraculous power. 'Power' is contextually plausible but is not the word Peter uses. Correct prediction at k=3 shows the model has internalized the 'in the name of Jesus' formula that pervades Acts' miracle narratives.", "surface_confounder": "power" }, { "id": "CAN_019", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "easy", "input": "And [MASK], full of grace and power, was doing great wonders and signs among the people.", "targets": [ "stephen" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "peter", "paul", "john", "james", "philip" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Acts 6:8", "reasoning": "Stephen is introduced as the first named figure 'full of grace and power' who performs wonders. The specific pairing of deacon ministry with miracle-working marks Stephen's unique role before his martyrdom speech. A model confusing Stephen with Peter or Paul reveals it has not learned the character-specific narrative arcs in Acts.", "surface_confounder": "" }, { "id": "CAN_020", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "And falling to his knees he cried out with a loud voice, Lord, do not hold this [MASK] against them.", "targets": [ "sin" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "act", "word", "crime", "deed", "wrong" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Acts 7:60", "reasoning": "Stephen's dying prayer parallels Jesus' words from the cross. 'Sin' is the canonical ESV/NASB term. A model predicting 'act' or 'crime' has a legal rather than theological register for this context. Correct prediction shows the model has absorbed the specific hamartiological vocabulary used in Stephen's martyrdom.", "surface_confounder": "act" }, { "id": "CAN_021", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "hard", "input": "And Saul, still breathing threats and [MASK] against the disciples of the Lord, went to the high priest.", "targets": [ "murder" ], "foils": [], "acceptable_alternatives": [ "slaughter" ], "failure_examples": [ "anger", "hatred", "lies", "curses", "violence" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "Acts 9:1", "reasoning": "Luke uses 'threats and murder' to characterize pre-conversion Saul's hostility. The pairing is jarring and specific \u2014 not 'threats and anger' or 'threats and violence.' Correct recall at k=3 demonstrates the model has encoded the precise lexical signature of Saul's introduction, which sets up the dramatic reversal of his conversion.", "surface_confounder": "violence" }, { "id": "CAN_022", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "For David, after he had served the purpose of God in his own [MASK], fell asleep and was laid with his fathers.", "targets": [ "generation" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "time", "life", "day", "age", "way" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Acts 13:36", "reasoning": "Paul's sermon in Pisidian Antioch uses 'generation' as the specific term for David's temporal service, contrasting David's mortality with Jesus' resurrection. 'Time' or 'life' are near-synonyms but 'generation' is the canonical ESV word. This tests whether the model has encoded Paul's precise vocabulary in his synagogue sermons.", "surface_confounder": "time" }, { "id": "CAN_023", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "easy", "input": "And he said, Who are you, [MASK]? And he said, I am Jesus, whom you are persecuting.", "targets": [ "lord" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "god", "sir", "master", "spirit", "king" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Acts 9:5", "reasoning": "Saul's address to the voice on the Damascus road is 'Lord' \u2014 the canonical term in all three tellings of his conversion. The word functions as both a respectful address and an ironic christological confession. This is easy due to the high frequency of the Damascus road account in biblical corpora.", "surface_confounder": "" }, { "id": "CON_020", "type": "contrastive_theology", "category": "pneumatology", "difficulty": "hard", "input": "The Holy Spirit was [MASK] out on the Gentiles at the house of Cornelius just as on the Jewish believers at Pentecost.", "targets": [ "poured" ], "foils": [ "earned" ], "acceptable_alternatives": [], "failure_examples": [ "forced", "pulled", "taken", "bought", "traded" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Acts 10:45", "reasoning": "The Spirit is 'poured out' (echoing Joel 2) \u2014 language of unmerited bestowal. The foil 'earned' represents a works-based pneumatology where Spirit reception is conditioned on human merit. A model that ranks 'earned' above 'poured' has inverted the grace-based framework of Acts' Spirit theology. 'Poured' beating 'earned' confirms the model has learned the gift character of the Spirit in Acts.", "surface_confounder": "given" }, { "id": "CON_021", "type": "contrastive_theology", "category": "soteriology", "difficulty": "medium", "input": "For it has seemed good to the Holy Spirit and to us to lay on you no greater [MASK] than these requirements.", "targets": [ "burden" ], "foils": [ "merit" ], "acceptable_alternatives": [], "failure_examples": [ "glory", "honor", "gift", "praise", "reward" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Acts 15:28", "reasoning": "The Jerusalem Council's decree minimizes requirements for Gentile believers \u2014 framing the law as a 'burden' rather than a path to merit. The foil 'merit' inverts the theological direction: instead of reducing obligation, it would imply the council is granting spiritual credit. 'Burden' beating 'merit' shows the model has learned the law-as-burden framework of Acts 15.", "surface_confounder": "" }, { "id": "CON_022", "type": "contrastive_theology", "category": "christology", "difficulty": "hard", "input": "God [MASK] Jesus from the dead, and of that we all are witnesses.", "targets": [ "raised" ], "foils": [ "created" ], "acceptable_alternatives": [], "failure_examples": [ "called", "found", "named", "changed", "chose" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Acts 2:32", "reasoning": "The apostolic kerygma declares God 'raised' Jesus \u2014 a resurrection claim, not an origination claim. The foil 'created' introduces an Arian-adjacent error: if God 'created' Jesus, the Son is a creature rather than the eternally begotten one who was resurrected. 'Raised' beating 'created' confirms the model has encoded the resurrection as the central christological event in Acts' preaching.", "surface_confounder": "sent" }, { "id": "CON_023", "type": "contrastive_theology", "category": "soteriology", "difficulty": "medium", "input": "Through this man forgiveness of sins is proclaimed to you, and by him everyone who believes is [MASK] from everything from which you could not be freed by the law of Moses.", "targets": [ "freed" ], "foils": [ "condemned" ], "acceptable_alternatives": [ "justified" ], "failure_examples": [ "punished", "cursed", "bound", "judged", "removed" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Acts 13:38-39", "reasoning": "Paul's Antioch sermon declares that belief in Jesus 'frees' where the Mosaic law could not. The foil 'condemned' represents a complete inversion \u2014 salvation becomes damnation. A model with correct soteriological polarity will rank 'freed' above 'condemned.' This tests whether the model has learned that the gospel announcement in Acts is liberating, not condemning.", "surface_confounder": "" }, { "id": "CON_024", "type": "contrastive_theology", "category": "pneumatology", "difficulty": "hard", "input": "When Simon saw that the Spirit was given through the laying on of the apostles' hands, he offered them [MASK].", "targets": [ "money" ], "foils": [ "prayer" ], "acceptable_alternatives": [], "failure_examples": [ "praise", "thanks", "food", "help", "gifts" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Acts 8:18-19", "reasoning": "Simon Magus's sin is offering 'money' to purchase spiritual power \u2014 the origin of the term 'simony.' The foil 'prayer' represents the correct response to seeing the Spirit's work. A model ranking 'prayer' above 'money' has misread the narrative \u2014 Simon's error is precisely that he commodified the Spirit. 'Money' beating 'prayer' shows the model has encoded the narrative's critique of transactional pneumatology.", "surface_confounder": "prayer" }, { "id": "CON_025", "type": "contrastive_theology", "category": "ecclesiology", "difficulty": "medium", "input": "Now the full number of those who believed were of one heart and soul, and no one said that any of the things that belonged to him was his own, but they had everything in [MASK].", "targets": [ "common" ], "foils": [ "secret" ], "acceptable_alternatives": [], "failure_examples": [ "order", "boxes", "parts", "pieces", "groups" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Acts 4:32", "reasoning": "The early church's communal life is defined by having things 'in common' \u2014 radical generosity. The foil 'secret' inverts the communal ethic: instead of open sharing, it implies hoarding and concealment, which is precisely the sin Ananias and Sapphira commit in Acts 5. 'Common' beating 'secret' shows the model has learned the ecclesiological ideal Luke presents.", "surface_confounder": "" }, { "id": "CON_026", "type": "contrastive_theology", "category": "pneumatology", "difficulty": "medium", "input": "But Peter said, Ananias, why has Satan filled your [MASK] to lie to the Holy Spirit and to keep back for yourself part of the proceeds?", "targets": [ "heart" ], "foils": [ "mind" ], "acceptable_alternatives": [], "failure_examples": [ "mouth", "hands", "home", "soul", "body" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Acts 5:3", "reasoning": "Peter diagnoses Ananias's sin as Satan filling his 'heart' \u2014 the seat of moral intention in biblical anthropology. The foil 'mind' is anthropologically adjacent but reflects a Greek-philosophical rather than Hebraic locus of deception. In the biblical register, self-deception and sin originate in the heart, not merely the mind. 'Heart' beating 'mind' demonstrates the model has absorbed biblical anthropological vocabulary.", "surface_confounder": "mind" }, { "id": "CON_027", "type": "contrastive_theology", "category": "eschatology", "difficulty": "hard", "input": "He has fixed a day on which he will [MASK] the world in righteousness by a man whom he has appointed.", "targets": [ "judge" ], "foils": [ "destroy" ], "acceptable_alternatives": [], "failure_examples": [ "bless", "heal", "save", "change", "end" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Acts 17:31", "reasoning": "Paul's Areopagus sermon concludes with the coming 'judgment' \u2014 a forensic eschatological act, not annihilation. The foil 'destroy' represents an annihilationist eschatology where the world is obliterated rather than held accountable. 'Judge' beating 'destroy' shows the model has learned the forensic character of Pauline eschatology as presented in Acts 17.", "surface_confounder": "destroy" }, { "id": "CON_028", "type": "contrastive_theology", "category": "soteriology", "difficulty": "medium", "input": "God exalted him at his right hand as Leader and [MASK], to give repentance to Israel and forgiveness of sins.", "targets": [ "savior" ], "foils": [ "destroyer" ], "acceptable_alternatives": [], "failure_examples": [ "warrior", "avenger", "conqueror", "ruler", "accuser" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Acts 5:31", "reasoning": "The apostolic confession exalts Jesus as 'Leader and Savior' \u2014 his exaltation is soteriological. The foil 'destroyer' inverts Christ's saving purpose into a destructive one. A model that ranks 'destroyer' above 'savior' has fundamentally misread the christological arc of Acts' preaching. The pairing of 'Leader and Savior' with 'repentance and forgiveness' creates a strong soteriological context.", "surface_confounder": "ruler" }, { "id": "CON_029", "type": "contrastive_theology", "category": "christology", "difficulty": "hard", "input": "Let all the house of Israel therefore know for certain that God has made him both [MASK] and Christ, this Jesus whom you crucified.", "targets": [ "lord" ], "foils": [ "angel" ], "acceptable_alternatives": [], "failure_examples": [ "prophet", "servant", "priest", "teacher", "rabbi" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Acts 2:36", "reasoning": "Peter declares Jesus as 'Lord and Christ' \u2014 the highest christological titles. The foil 'angel' represents a subordinationist christology where Jesus is a created spiritual being rather than the exalted Lord. 'Lord' beating 'angel' demonstrates the model has internalized the Petrine confession's high christology against angelic-christology errors found in early heterodox movements.", "surface_confounder": "angel" }, { "id": "CON_030", "type": "contrastive_theology", "category": "ecclesiology", "difficulty": "medium", "input": "Pay careful attention to yourselves and to all the [MASK], in which the Holy Spirit has made you overseers, to care for the church of God.", "targets": [ "flock" ], "foils": [ "army" ], "acceptable_alternatives": [], "failure_examples": [ "land", "crowd", "world", "kingdom", "city" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Acts 20:28", "reasoning": "Paul's farewell to the Ephesian elders uses pastoral 'flock' imagery for the church. The foil 'army' introduces a militaristic ecclesiology where church leadership is command-and-control rather than shepherding. 'Flock' beating 'army' shows the model has learned the pastoral metaphor that defines eldership in Acts and the Pastoral Epistles.", "surface_confounder": "people" }, { "id": "CON_031", "type": "contrastive_theology", "category": "pneumatology", "difficulty": "medium", "input": "You stiff-necked people, you always [MASK] the Holy Spirit. As your fathers did, so do you.", "targets": [ "resist" ], "foils": [ "obey" ], "acceptable_alternatives": [], "failure_examples": [ "love", "praise", "welcome", "honor", "seek" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Acts 7:51", "reasoning": "Stephen's accusation charges Israel with 'resisting' the Holy Spirit \u2014 a pneumatological indictment. The foil 'obey' inverts the accusation into a commendation. A model ranking 'obey' above 'resist' has failed to encode the adversarial context of Stephen's speech. 'Resist' beating 'obey' confirms the model has learned the prosecutorial register of Acts 7.", "surface_confounder": "obey" }, { "id": "DOC_037", "type": "doctrinal_association", "category": "soteriology", "difficulty": "medium", "input": "The gospel is not self-improvement or moral reform but the announcement that Christ has accomplished our [MASK] through the cross.", "targets": [ "redemption" ], "foils": [], "acceptable_alternatives": [ "salvation" ], "failure_examples": [ "healing", "success", "happiness", "purpose", "freedom" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "", "reasoning": "Chandler's central thesis contrasts the explicit gospel with moralistic therapeutic deism. The co-occurrence of 'accomplished,' 'cross,' and the negation of 'self-improvement' should activate soteriological terms like 'redemption' over therapeutic language like 'healing' or 'happiness.' A model predicting 'success' or 'purpose' reveals it has absorbed self-help register rather than gospel register.", "surface_confounder": "freedom" }, { "id": "DOC_038", "type": "doctrinal_association", "category": "hamartiology", "difficulty": "hard", "input": "Man's fundamental problem is not bad behavior but a corrupt [MASK] that is hostile to God from birth.", "targets": [ "nature" ], "foils": [], "acceptable_alternatives": [ "heart" ], "failure_examples": [ "attitude", "mind", "habit", "world", "culture" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "", "reasoning": "Chandler's 'Man' chapter in the Ground section argues for total depravity rooted in fallen nature, not merely behavioral dysfunction. 'Nature' is the precise hamartiological term \u2014 'corrupt nature hostile to God' is the Augustinian/Reformed formula. 'Heart' is acceptable as a biblical near-synonym. 'Attitude' or 'habit' would indicate the model has collapsed depravity into behavioral categories, exactly the moralistic reduction Chandler opposes.", "surface_confounder": "attitude" }, { "id": "DOC_039", "type": "doctrinal_association", "category": "theology_proper", "difficulty": "medium", "input": "God does not exist to serve human purposes; all of creation exists for the display of his [MASK].", "targets": [ "glory" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "love", "power", "plan", "goodness", "mercy" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "", "reasoning": "Chandler opens the Ground section by insisting God's glory \u2014 not human flourishing \u2014 is the teleological center of the universe. The frame 'all of creation exists for the display of' strongly constrains to 'glory' as the Reformed answer. 'Love' is a surface confounder because it is true of God but does not fit the doxological frame Chandler constructs. A model predicting 'plan' or 'goodness' has learned theistic vocabulary without the God-centered teleology.", "surface_confounder": "love" }, { "id": "DOC_040", "type": "doctrinal_association", "category": "eschatology", "difficulty": "medium", "input": "The gospel in the air moves from creation to fall to reconciliation and finally to [MASK], when God makes all things new.", "targets": [ "consummation" ], "foils": [], "acceptable_alternatives": [ "restoration" ], "failure_examples": [ "heaven", "rapture", "eternity", "paradise", "judgment" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "", "reasoning": "Chandler's four-movement cosmic gospel framework \u2014 Creation, Fall, Reconciliation, Consummation \u2014 is the structural spine of Part 2. 'Consummation' is the precise eschatological term completing this sequence. 'Heaven' would indicate a truncated popular eschatology rather than the new-creation theology Chandler advocates. 'Restoration' is acceptable as it captures the 'all things new' renewal. A model predicting 'rapture' has imported dispensational vocabulary foreign to Chandler's framework.", "surface_confounder": "heaven" }, { "id": "CAN_024", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "easy", "input": "For by [MASK] you have been saved through faith, and this is not your own doing; it is the gift of God.", "targets": [ "grace" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "works", "law", "God", "Christ", "mercy" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Ephesians 2:8", "reasoning": "Ephesians 2:8-9 is one of the most frequently cited passages in The Explicit Gospel, anchoring Chandler's argument that salvation is entirely a gift. 'Grace' is the canonical term and among the highest-frequency soteriological collocations in evangelical corpora. Failure to rank it in top-10 would indicate severe underexposure to Pauline soteriology.", "surface_confounder": "" }, { "id": "CAN_025", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "And you were dead in the trespasses and [MASK] in which you once walked.", "targets": [ "sins" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "darkness", "flesh", "ways", "shame", "pain" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Ephesians 2:1", "reasoning": "Ephesians 2:1 is the linchpin verse for Chandler's case for total depravity \u2014 spiritual death precedes any choice. The hendiadys 'trespasses and sins' is a fixed canonical pair. A model that predicts 'darkness' or 'flesh' has learned Pauline vocabulary but not the specific collocational structure of this verse. Correct prediction shows the model has encoded canonical phrasing, not just topical domain.", "surface_confounder": "darkness" }, { "id": "CAN_026", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "He is the [MASK] of the invisible God, the firstborn of all creation.", "targets": [ "image" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "son", "word", "light", "lord", "glory" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Colossians 1:15", "reasoning": "Chandler uses the Colossians 1 hymn to establish Christ's cosmic supremacy in the 'Gospel in the Air' section. 'Image' is the canonical term from the Christ hymn. 'Son' is a strong confounder because it is true christologically but is not the word in this verse. Correct prediction demonstrates the model distinguishes canonical phrasing from generic christological vocabulary.", "surface_confounder": "son" }, { "id": "CON_032", "type": "contrastive_theology", "category": "soteriology", "difficulty": "hard", "input": "Chandler argues that what many churches actually preach is not the gospel but moralistic therapeutic [MASK].", "targets": [ "deism" ], "foils": [ "theism" ], "acceptable_alternatives": [], "failure_examples": [ "religion", "faith", "teaching", "belief", "Christianity" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "", "reasoning": "Moralistic Therapeutic Deism (Smith & Denton, adopted heavily by Chandler) is the specific sociological term for the counterfeit gospel. The foil 'theism' tests whether the model has learned the precise three-word formula or merely associates 'moralistic therapeutic' with generic God-belief. 'Deism' beating 'theism' shows the model has encoded this specific cultural-theological critique rather than defaulting to the more frequent word 'theism.' This is hard because 'theism' has much higher base frequency.", "surface_confounder": "theism" }, { "id": "CON_033", "type": "contrastive_theology", "category": "christology", "difficulty": "medium", "input": "The cross is not merely an example of sacrificial love but a [MASK] that satisfies the wrath of God against sin.", "targets": [ "sacrifice" ], "foils": [ "symbol" ], "acceptable_alternatives": [ "propitiation" ], "failure_examples": [ "lesson", "story", "gesture", "gift", "tragedy" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "", "reasoning": "Chandler insists on penal substitutionary atonement against reductions of the cross to moral influence or mere exemplarism. The foil 'symbol' represents the liberal theological position that the cross is illustrative rather than effectual. 'Sacrifice' beating 'symbol' shows the model has learned the substitutionary framework. 'Propitiation' is an excellent alternative as it is even more precise. 'Tragedy' or 'lesson' would indicate the model has absorbed secular or moral-influence framing of the crucifixion.", "surface_confounder": "symbol" }, { "id": "CON_034", "type": "contrastive_theology", "category": "hamartiology", "difficulty": "hard", "input": "Apart from the Spirit's intervention, the natural man is not merely sick in sin but spiritually [MASK].", "targets": [ "dead" ], "foils": [ "weak" ], "acceptable_alternatives": [], "failure_examples": [ "lost", "broken", "confused", "blind", "empty" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Ephesians 2:1", "reasoning": "Chandler makes the Calvinist case that unregenerate humanity is not merely impaired but dead \u2014 incapable of self-rescue. The foil 'weak' represents the Arminian or semi-Pelagian position that sin damages but does not destroy spiritual capacity. 'Dead' beating 'weak' shows the model has encoded the Reformed total-depravity distinction. The sentence structure ('not merely sick but') creates a scalar frame that should push toward the stronger term. 'Lost' and 'broken' are semi-accurate but lack the Ephesians 2:1 precision Chandler insists on.", "surface_confounder": "weak" }, { "id": "DOC_041", "type": "doctrinal_association", "category": "soteriology", "difficulty": "medium", "input": "Paul writes that God saved believers not because of their works but because of his [MASK].", "targets": [ "grace" ], "foils": [], "acceptable_alternatives": [ "mercy" ], "failure_examples": [ "justice", "anger", "power", "wisdom", "law" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Ephesians 2:5-8", "reasoning": "Ephesians 2:5-8 repeatedly emphasizes grace (charis) as the ground of salvation. The juxtaposition with 'not because of their works' should strongly activate 'grace' specifically. 'Mercy' is an acceptable alternative because Eph 2:4 mentions God's mercy in the same passage. A model predicting 'power' or 'wisdom' has learned generic divine attributes but not the soteriological grammar of Ephesians.", "surface_confounder": "love" }, { "id": "DOC_042", "type": "doctrinal_association", "category": "hamartiology", "difficulty": "medium", "input": "Before coming to faith, Paul describes all people as spiritually [MASK] in their trespasses and sins.", "targets": [ "dead" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "lost", "weak", "sick", "broken", "trapped" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Ephesians 2:1", "reasoning": "Ephesians 2:1 uses 'dead' (nekros) to describe the pre-conversion state, which is foundational to the Augustinian/Reformed doctrine of total inability. 'Lost' or 'sick' would reflect Arminian or semi-Pelagian hamartiologies that understate the depth of human depravity. The collocates 'trespasses and sins' should strongly cue 'dead' for a model trained on Pauline corpus.", "surface_confounder": "lost" }, { "id": "DOC_043", "type": "doctrinal_association", "category": "pneumatology", "difficulty": "easy", "input": "In Ephesians, believers are described as being [MASK] with the promised Holy Spirit as a guarantee of their inheritance.", "targets": [ "sealed" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "blessed", "touched", "chosen", "covered", "gifted" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Ephesians 1:13-14", "reasoning": "The sealing (sphragizo) of the Holy Spirit in Eph 1:13 is a distinctive Pauline pneumatological concept denoting divine ownership, authenticity, and security. 'Filled' is a plausible alternative from Eph 5:18 but lacks the guarantee/inheritance semantic frame present in the sentence. A model predicting 'blessed' has learned the devotional register but not the specific pneumatological vocabulary.", "surface_confounder": "filled" }, { "id": "DOC_044", "type": "doctrinal_association", "category": "ecclesiology", "difficulty": "easy", "input": "Paul urges the Ephesians to maintain the [MASK] of the Spirit in the bond of peace.", "targets": [ "unity" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "power", "fire", "presence", "gift", "law" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Ephesians 4:3", "reasoning": "Unity (henotes) is the central ecclesiological imperative in Ephesians 4:1-6. The phrase 'bond of peace' directly modifies how unity is maintained. A model producing 'power' or 'gift' has generalized to charismatic vocabulary instead of learning the specific relational-ecclesiological emphasis of Ephesians 4.", "surface_confounder": "love" }, { "id": "DOC_045", "type": "doctrinal_association", "category": "christology", "difficulty": "easy", "input": "Paul declares that Christ is the [MASK] of the church, which is his body.", "targets": [ "head" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "king", "heart", "soul", "founder", "center" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Ephesians 5:23", "reasoning": "The head-body metaphor for Christ and the church is a signature Pauline christological-ecclesiological concept in Ephesians (1:22, 4:15, 5:23). 'Head' should be strongly activated by the body-church collocate. 'King' or 'lord' reflect valid but non-Ephesian christological categories. A model predicting 'heart' has defaulted to generic metaphorical language.", "surface_confounder": "lord" }, { "id": "DOC_046", "type": "doctrinal_association", "category": "christology", "difficulty": "hard", "input": "Paul writes that in Christ believers have redemption through his [MASK], the forgiveness of sins.", "targets": [ "blood" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "name", "word", "life", "power", "spirit" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "Ephesians 1:7", "reasoning": "Ephesians 1:7 specifies 'through his blood' (dia tou haimatos autou) as the instrument of redemption. 'Death' is a theologically adjacent surface confounder that refers to the same event but misses the sacrificial-cultic register Paul employs. 'Blood' in Pauline atonement theology is not merely a metonym for death but evokes Levitical sacrifice. A model ranking 'death' above 'blood' lacks the ritual-atonement layer of Paul's soteriology.", "surface_confounder": "death" }, { "id": "DOC_047", "type": "doctrinal_association", "category": "ecclesiology", "difficulty": "medium", "input": "The church is built on the foundation of the apostles and [MASK], with Christ Jesus as the cornerstone.", "targets": [ "prophets" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "kings", "priests", "saints", "elders", "angels" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Ephesians 2:20", "reasoning": "Ephesians 2:20 pairs apostles and prophets as the ecclesiological foundation. In Ephesian context, 'prophets' likely refers to New Testament prophets (see Eph 3:5, 4:11), not Old Testament figures. A model predicting 'priests' has imported Levitical categories foreign to Paul's ecclesiology. The apostle-prophet pairing should be a strong learned bigram for any model with adequate Pauline exposure.", "surface_confounder": "priests" }, { "id": "DOC_048", "type": "doctrinal_association", "category": "spiritual_warfare", "difficulty": "medium", "input": "Paul commands believers to put on the full [MASK] of God to stand against the schemes of the devil.", "targets": [ "armor" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "glory", "name", "spirit", "light", "presence" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Ephesians 6:11", "reasoning": "The panoplia (full armor) of God in Eph 6:11 is one of the most recognizable passages in Ephesians. The military metaphor should be strongly cued by 'stand against' and 'schemes of the devil.' A model predicting 'power' or 'word' has generalized to common Pauline collocates of God without learning the distinctive martial vocabulary of Ephesians 6.", "surface_confounder": "power" }, { "id": "DOC_049", "type": "doctrinal_association", "category": "hamartiology", "difficulty": "medium", "input": "Paul describes unbelievers as following the prince of the power of the air, the [MASK] that is now at work in the sons of disobedience.", "targets": [ "spirit" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "force", "god", "king", "demon", "law" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Ephesians 2:2", "reasoning": "Ephesians 2:2 identifies the operative agent in unregenerate humanity as a 'spirit' (pneuma) \u2014 specifically the spirit associated with the prince of the power of the air (Satan). This is a subtle hamartiological claim: sin is not merely behavioral but driven by a personal spiritual force. A model predicting 'force' has captured the dynamic quality but lost the personal-spiritual dimension central to Pauline demonology.", "surface_confounder": "force" }, { "id": "DOC_050", "type": "doctrinal_association", "category": "theology_proper", "difficulty": "hard", "input": "Paul prays that the Ephesians would know the immeasurable greatness of God's [MASK] toward those who believe.", "targets": [ "power" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "wrath", "wisdom", "glory", "judgment", "plan" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "Ephesians 1:19", "reasoning": "Ephesians 1:19 uses a remarkable stacking of power words (dynamis, energeia, kratos, ischus) to describe God's power toward believers, linking it to the resurrection of Christ. 'Love' is the strongest surface confounder because Ephesians is saturated with love language, but this specific prayer is about power. A model predicting 'love' here has defaulted to the dominant Ephesian theme instead of parsing the immediate context of the prayer.", "surface_confounder": "love" }, { "id": "CAN_027", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "easy", "input": "For by grace you have been saved through [MASK], and this is not your own doing; it is the gift of God.", "targets": [ "faith" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "works", "law", "merit", "obedience", "effort" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Ephesians 2:8", "reasoning": "Ephesians 2:8 is one of the highest-frequency Pauline verses in evangelical training corpora. The 'grace...through [MASK]' construction is the foundational Reformation proof-text for sola fide. 'Faith' should be top-1 for any model with meaningful exposure to Pauline or evangelical texts. Failure here indicates the model has not internalized even the most basic Pauline soteriology.", "surface_confounder": "" }, { "id": "CAN_028", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "easy", "input": "For we are his workmanship, created in Christ Jesus for good [MASK], which God prepared beforehand.", "targets": [ "works" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "times", "days", "things", "deeds", "purposes" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Ephesians 2:10", "reasoning": "Ephesians 2:10 completes the grace-faith-works triad of 2:8-10. The irony of 'good works' appearing immediately after 'not of works' (2:9) is a distinctive Pauline rhetorical move. A model that has learned the canonical sequence should strongly predict 'works' in this frame. 'Deeds' is close but not the ESV/NASB canonical term.", "surface_confounder": "" }, { "id": "CAN_029", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "There is one body and one Spirit, just as you were called to the one [MASK] that belongs to your call.", "targets": [ "hope" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "lord", "god", "church", "spirit", "truth" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Ephesians 4:4", "reasoning": "The seven-fold unity formula of Eph 4:4-6 (one body, one Spirit, one hope, one Lord, one faith, one baptism, one God) is a creedal sequence. 'Hope' is the third element and is cued by 'called to the one [MASK] that belongs to your call.' A model confusing this with 'faith' or 'lord' from later in the sequence reveals it has not learned the internal ordering of the formula.", "surface_confounder": "faith" }, { "id": "CAN_030", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "Take the helmet of salvation, and the [MASK] of the Spirit, which is the word of God.", "targets": [ "sword" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "shield", "belt", "fire", "gift", "power" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Ephesians 6:17", "reasoning": "The armor of God passage in Eph 6:10-17 assigns specific spiritual realities to each piece of armor. The sword (machaira) of the Spirit is uniquely identified as 'the word of God,' making it the only offensive weapon in the panoply. 'Shield' is a strong confounder from the same passage (shield of faith, v.16), but the appositive 'which is the word of God' should disambiguate for a model that has learned the armor-piece mappings.", "surface_confounder": "shield" }, { "id": "CAN_031", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "hard", "input": "Having the eyes of your [MASK] enlightened, that you may know what is the hope to which he has called you.", "targets": [ "hearts" ], "foils": [], "acceptable_alternatives": [ "heart" ], "failure_examples": [ "mind", "soul", "eyes", "spirit", "understanding" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "Ephesians 1:18", "reasoning": "The ESV reads 'eyes of your hearts,' which is a distinctive Pauline anthropological metaphor where kardia (heart) is the seat of spiritual perception. 'Mind' and 'understanding' are strong surface confounders because English readers associate enlightenment with intellect, but Paul's Greek uses kardia. A model that predicts 'mind' over 'hearts' has defaulted to English collocation patterns rather than biblical anthropological vocabulary.", "surface_confounder": "mind" }, { "id": "CAN_032", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "For he himself is our [MASK], who has made us both one and has broken down the dividing wall of hostility.", "targets": [ "peace" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "lord", "king", "savior", "hope", "strength" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Ephesians 2:14", "reasoning": "Ephesians 2:14 makes the striking claim that Christ is not merely the bringer of peace but is himself 'our peace' (eirene). The context of Jew-Gentile reconciliation and the 'dividing wall' metaphor should activate this term. A model predicting 'lord' or 'savior' has learned generic christological titles but not the specific peace-christology of Ephesians 2.", "surface_confounder": "savior" }, { "id": "CON_035", "type": "contrastive_theology", "category": "soteriology", "difficulty": "medium", "input": "Paul teaches that salvation is God's free [MASK] to sinners, not the result of human works.", "targets": [ "gift" ], "foils": [ "reward" ], "acceptable_alternatives": [], "failure_examples": [ "payment", "wage", "prize", "loan", "trade" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Ephesians 2:8-9", "reasoning": "Ephesians 2:8 explicitly identifies salvation as a gift (doron). The foil 'reward' implies merit-based compensation, which directly contradicts the grace-works antithesis of 2:8-9. A model ranking 'reward' above 'gift' has conflated grace-based and works-based soteriological frameworks. The 'free' modifier in the sentence should reinforce the unmerited quality, but a model with prosperity-gospel or moralistic contamination may still prefer 'reward.'", "surface_confounder": "reward" }, { "id": "CON_036", "type": "contrastive_theology", "category": "soteriology", "difficulty": "hard", "input": "In Ephesians, Paul teaches that believers were [MASK] by God before the creation of the world.", "targets": [ "chosen" ], "foils": [ "created" ], "acceptable_alternatives": [ "selected" ], "failure_examples": [ "made", "born", "formed", "found", "tested" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Ephesians 1:4", "reasoning": "Ephesians 1:4 uses exelexato (chose/elected) to describe God's pretemporal act toward believers. The foil 'created' confuses election with creation \u2014 a category error that conflates God's sovereign choice of persons with his act of bringing them into existence. A model ranking 'created' above 'chosen' has not learned the distinction between divine election and divine creation, which is fundamental to Pauline predestination theology.", "surface_confounder": "created" }, { "id": "CON_037", "type": "contrastive_theology", "category": "ecclesiology", "difficulty": "medium", "input": "Paul describes the relationship between Christ and the church as that of a [MASK] and wife.", "targets": [ "husband" ], "foils": [ "king" ], "acceptable_alternatives": [], "failure_examples": [ "master", "father", "priest", "ruler", "judge" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Ephesians 5:25-32", "reasoning": "Ephesians 5:25-32 develops the Christ-church relationship through the marital metaphor, not the royal metaphor. The foil 'king' represents a valid but contextually wrong christological category \u2014 Christ is called king elsewhere, but Ephesians 5 specifically deploys the husband-wife analogy to teach sacrificial love and covenantal intimacy. A model ranking 'king' above 'husband' has imported generic christological titles rather than learning the specific ecclesiological metaphor Paul constructs.", "surface_confounder": "king" }, { "id": "CON_038", "type": "contrastive_theology", "category": "hamartiology", "difficulty": "hard", "input": "Paul says that apart from Christ, people are by [MASK] children of wrath.", "targets": [ "nature" ], "foils": [ "choice" ], "acceptable_alternatives": [], "failure_examples": [ "birth", "law", "fate", "accident", "habit" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Ephesians 2:3", "reasoning": "Ephesians 2:3 states 'by nature children of wrath' (physei tekna orges), indicating an inherent sinful condition, not merely sinful behavior. The foil 'choice' represents the Pelagian error that humans are sinful only by voluntary decision. A model ranking 'choice' above 'nature' has absorbed libertarian free-will anthropology over the Augustinian/Reformed doctrine of original sin that Ephesians 2:1-3 teaches. This is one of the most precise hamartiological tests in the Pauline corpus.", "surface_confounder": "choice" }, { "id": "CON_039", "type": "contrastive_theology", "category": "christology", "difficulty": "hard", "input": "Paul teaches that Christ [MASK] far above all the heavens, that he might fill all things.", "targets": [ "ascended" ], "foils": [ "descended" ], "acceptable_alternatives": [ "rose" ], "failure_examples": [ "came", "went", "fell", "returned", "appeared" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Ephesians 4:10", "reasoning": "Ephesians 4:9-10 presents a descent-then-ascent christological pattern. The foil 'descended' tests whether the model can parse the directional logic: the clause 'far above all the heavens' and the purpose 'fill all things' require the upward motion of ascension. 'Descended' appears in the immediately prior verse (4:9), making it a strong lexical confounder. A model ranking 'descended' above 'ascended' has failed to learn the semantic constraints of the spatial prepositions in the passage.", "surface_confounder": "descended" }, { "id": "CON_040", "type": "contrastive_theology", "category": "ecclesiology", "difficulty": "medium", "input": "In Ephesians, the dividing wall between Jew and Gentile was broken down by Christ to create one new [MASK].", "targets": [ "man" ], "foils": [ "law" ], "acceptable_alternatives": [ "humanity" ], "failure_examples": [ "temple", "race", "nation", "group", "world" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Ephesians 2:15", "reasoning": "Ephesians 2:15 says Christ's purpose was to 'create in himself one new man' (hena kainon anthropon) \u2014 a unified humanity. The foil 'law' is the entity abolished (the law of commandments), not the entity created. A model ranking 'law' above 'man' has confused the means of reconciliation (abolishing the law) with the result (creating one new man). This tests whether the model has learned the causal structure of the passage, not just its vocabulary.", "surface_confounder": "law" }, { "id": "CON_041", "type": "contrastive_theology", "category": "spiritual_warfare", "difficulty": "easy", "input": "The Christian's struggle in Ephesians is not against flesh and blood but against spiritual [MASK] in the heavenly places.", "targets": [ "forces" ], "foils": [ "people" ], "acceptable_alternatives": [ "powers", "rulers" ], "failure_examples": [ "problems", "ideas", "feelings", "thoughts", "sins" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Ephesians 6:12", "reasoning": "Ephesians 6:12 explicitly negates a human enemy ('not against flesh and blood') and identifies spiritual entities (archai, exousiai, kosmokratores) as the true adversary. The foil 'people' represents a naturalistic misreading that reduces spiritual warfare to interpersonal conflict. A model ranking 'people' above 'forces' has not internalized the supernatural cosmology of Pauline spiritual warfare.", "surface_confounder": "people" }, { "id": "CON_042", "type": "contrastive_theology", "category": "soteriology", "difficulty": "hard", "input": "Paul says believers were saved by grace through faith, indicating that faith is the [MASK] through which grace is received, not the cause of salvation.", "targets": [ "means" ], "foils": [ "cause" ], "acceptable_alternatives": [ "channel", "instrument" ], "failure_examples": [ "source", "origin", "basis", "reason", "proof" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Ephesians 2:8", "reasoning": "The dia + genitive construction in Ephesians 2:8 (dia pisteos) marks faith as the instrumental means, not the efficient cause, of salvation. The foil 'cause' reflects a synergistic theology that elevates faith to a meritorious act contributing to salvation, which contradicts the immediately following clause 'not of yourselves.' A model ranking 'cause' above 'means' has not learned the instrumental-causal distinction that is central to Reformation soteriology and embedded in this Pauline syntax.", "surface_confounder": "cause" }, { "id": "DOC_051", "type": "doctrinal_association", "category": "eschatology", "difficulty": "medium", "input": "Ezekiel's vision of a valley of dry bones coming to life prefigures the [MASK] of the dead in the last days.", "targets": [ "resurrection" ], "foils": [], "acceptable_alternatives": [ "raising", "rising" ], "failure_examples": [ "burial", "destruction", "sleep", "decay", "removal" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Ezekiel 37:1-14", "reasoning": "The dry bones vision is one of the Old Testament's most vivid resurrection typologies. A model that predicts 'resurrection' shows it has mapped the bones-to-life narrative arc onto eschatological categories. Predicting 'burial' or 'destruction' would indicate inverted polarity \u2014 associating dry bones with finality rather than reversal.", "surface_confounder": "restoration" }, { "id": "DOC_052", "type": "doctrinal_association", "category": "soteriology", "difficulty": "medium", "input": "In Ezekiel 36, God promises to remove the heart of [MASK] and give his people a heart of flesh.", "targets": [ "stone" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "gold", "iron", "wood", "fire", "darkness" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Ezekiel 36:26", "reasoning": "The stone/flesh contrast is Ezekiel's signature image for regeneration. 'Stone' is the only word that completes the biblical metaphor \u2014 hardness and unresponsiveness to God. A model predicting 'iron' or 'gold' reveals it has learned material-contrast patterns but not the specific covenantal idiom. This tests whether the model encodes the Ezekiel 36 heart-replacement formula precisely.", "surface_confounder": "iron" }, { "id": "DOC_053", "type": "doctrinal_association", "category": "pneumatology", "difficulty": "easy", "input": "God promises in Ezekiel to put his [MASK] within his people so they walk in his statutes and obey his rules.", "targets": [ "spirit" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "wrath", "angel", "law", "curse", "sword" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Ezekiel 36:27", "reasoning": "Ezekiel 36:27 is a foundational pneumatological text \u2014 the indwelling Spirit as the agent of obedience. The co-occurrence of 'put within' and 'walk in statutes' should strongly activate 'spirit.' A model predicting 'law' shows it has learned the obedience-command domain but missed the internal-agent shift that distinguishes new covenant theology from Sinai covenant.", "surface_confounder": "law" }, { "id": "DOC_054", "type": "doctrinal_association", "category": "theology_proper", "difficulty": "medium", "input": "The departure of God's [MASK] from the temple in Ezekiel signifies that the LORD has withdrawn his presence from Israel.", "targets": [ "glory" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "angel", "servant", "prophet", "army", "fire" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Ezekiel 10:18-19", "reasoning": "The kabod (glory) theology is Ezekiel's structuring motif \u2014 glory departs (chs 8\u201311), and glory returns (ch 43). 'Glory' is the technical term for God's manifest presence. A model predicting 'angel' or 'fire' shows it has learned theophany vocabulary but not the specific glory-departure-return arc that organizes the book.", "surface_confounder": "presence" }, { "id": "DOC_055", "type": "doctrinal_association", "category": "ecclesiology", "difficulty": "medium", "input": "God appoints Ezekiel to [MASK] over the house of Israel and warn the wicked of their coming judgment.", "targets": [ "watch" ], "foils": [], "acceptable_alternatives": [ "guard" ], "failure_examples": [ "reign", "rule", "destroy", "mourn", "curse" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Ezekiel 33:7", "reasoning": "The watchman metaphor in Ezekiel 3 and 33 defines the prophetic office as sentinel responsibility \u2014 warn, not rule. A model predicting 'reign' or 'rule' conflates prophetic and royal offices. 'Watch' captures the specific relational duty: vigilance and proclamation without governmental authority.", "surface_confounder": "rule" }, { "id": "DOC_056", "type": "doctrinal_association", "category": "hamartiology", "difficulty": "easy", "input": "Ezekiel declares that the soul who sins shall [MASK], establishing individual moral accountability before God.", "targets": [ "die" ], "foils": [], "acceptable_alternatives": [ "perish" ], "failure_examples": [ "prosper", "live", "sleep", "rest", "heal" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Ezekiel 18:20", "reasoning": "Ezekiel 18:20 is one of the clearest statements of individual retribution in the Old Testament. The 'soul who sins shall die' formula is high-frequency in theological corpora. Failure to predict 'die' in top-10 would indicate the model has not internalized basic hamartiological cause-effect patterning.", "surface_confounder": "" }, { "id": "DOC_057", "type": "doctrinal_association", "category": "theology_proper", "difficulty": "hard", "input": "In Ezekiel 11, the LORD promises to be a [MASK] to his people in the countries where they have been scattered.", "targets": [ "sanctuary" ], "foils": [], "acceptable_alternatives": [ "refuge" ], "failure_examples": [ "judge", "enemy", "burden", "stranger", "wall" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "Ezekiel 11:16", "reasoning": "The 'little sanctuary' promise (Ezekiel 11:16) is theologically dense \u2014 God himself becomes the temple for exiles. This tests whether the model has encoded the temple-to-person transfer that prefigures incarnational theology. 'Refuge' is acceptable but less precise; 'judge' or 'enemy' would reveal inverted exile theology where God is the antagonist rather than the shelter.", "surface_confounder": "shield" }, { "id": "DOC_058", "type": "doctrinal_association", "category": "eschatology", "difficulty": "medium", "input": "The river flowing from Ezekiel's visionary [MASK] brings healing and life to everything it touches.", "targets": [ "temple" ], "foils": [], "acceptable_alternatives": [ "throne" ], "failure_examples": [ "cave", "mountain", "desert", "city", "tomb" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Ezekiel 47:1-12", "reasoning": "Ezekiel 47's life-giving river originates from the temple threshold. A model predicting 'temple' shows it has connected the river-of-life motif to its cultic source. 'Mountain' is a plausible geographic confounder but misses the temple-as-source-of-life theology. This image reappears in Revelation 22, so encoding the Ezekiel source matters for intertextual coherence.", "surface_confounder": "mountain" }, { "id": "DOC_059", "type": "doctrinal_association", "category": "soteriology", "difficulty": "medium", "input": "God promises in Ezekiel 37 to make an everlasting covenant of [MASK] with his restored people.", "targets": [ "peace" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "war", "wrath", "silence", "debt", "blood" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Ezekiel 37:26", "reasoning": "The 'covenant of peace' (berit shalom) is Ezekiel's term for the eschatological covenant. 'Peace' here is not merely cessation of conflict but total restoration of the divine-human relationship. A model predicting 'war' or 'wrath' reveals inverted covenant theology. 'Blood' is a covenant-adjacent term but belongs to the Mosaic ratification idiom, not Ezekiel's restoration formula.", "surface_confounder": "blood" }, { "id": "DOC_060", "type": "doctrinal_association", "category": "theology_proper", "difficulty": "hard", "input": "In Ezekiel's inaugural vision, the living creatures each had four [MASK] and four wings, reflecting the majesty of God's heavenly court.", "targets": [ "faces" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "arms", "horns", "crowns", "tails", "legs" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "Ezekiel 1:6", "reasoning": "The four faces (human, lion, ox, eagle) are Ezekiel's distinctive cherubim iconography. 'Faces' is precise \u2014 not 'heads' (which would imply four separate heads rather than four faces on one head). A model predicting 'horns' or 'crowns' has learned apocalyptic creature vocabulary generically without distinguishing Ezekiel's cherubim from Daniel's or Revelation's beasts.", "surface_confounder": "heads" }, { "id": "CAN_033", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "easy", "input": "And I will give you a new [MASK], and a new spirit I will put within you.", "targets": [ "heart" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "law", "name", "body", "sword", "song" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Ezekiel 36:26", "reasoning": "Ezekiel 36:26 is among the most-quoted Old Testament regeneration texts. 'Heart' is the canonical word in every major English translation. The parallelism 'new heart / new spirit' makes this contextually unambiguous. Failure in top-10 would indicate no meaningful exposure to covenantal renewal language.", "surface_confounder": "" }, { "id": "CAN_034", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "easy", "input": "Prophesy over these [MASK], and say to them, O dry bones, hear the word of the LORD.", "targets": [ "bones" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "stones", "ashes", "ruins", "graves", "fields" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Ezekiel 37:4", "reasoning": "The command to prophesy over the dry bones is the climactic moment of Ezekiel 37. 'Bones' is immediately reinforced by the appositive 'dry bones' in the same sentence. Even a model with minimal biblical exposure should retrieve this given the heavy lexical cueing. 'Stones' as a top-3 prediction would suggest generic prophetic-address conflation.", "surface_confounder": "" }, { "id": "CAN_035", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "I will sprinkle [MASK] water on you, and you shall be clean from all your uncleannesses.", "targets": [ "clean" ], "foils": [], "acceptable_alternatives": [ "pure" ], "failure_examples": [ "bitter", "hot", "deep", "salt", "still" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Ezekiel 36:25", "reasoning": "Ezekiel 36:25 uses 'clean water' in the ESV (NASB: 'clean water'). The adjective modifying 'water' must align with the purification semantic field. A model predicting 'bitter' or 'salt' has learned water-modifier patterns from other biblical contexts (Marah, Dead Sea) but not the cleansing ritual register of Ezekiel 36.", "surface_confounder": "holy" }, { "id": "CAN_036", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "Come from the four [MASK], O breath, and breathe on these slain, that they may live.", "targets": [ "winds" ], "foils": [], "acceptable_alternatives": [ "corners" ], "failure_examples": [ "rivers", "mountains", "seas", "gates", "tribes" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Ezekiel 37:9", "reasoning": "The command to the breath/wind (ruach) to come from the four winds is the second prophetic act in the dry bones sequence. 'Winds' is the ESV/NASB canonical term. 'Corners' captures the directional sense but is a different translation tradition. A model predicting 'rivers' or 'mountains' is defaulting to geographic nouns without encoding the wind/breath/spirit semantic cluster that is central to Ezekiel 37.", "surface_confounder": "directions" }, { "id": "CAN_037", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "hard", "input": "And I will set up over them one [MASK], my servant David, and he shall feed them.", "targets": [ "shepherd" ], "foils": [], "acceptable_alternatives": [ "king" ], "failure_examples": [ "priest", "prophet", "judge", "warrior", "angel" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "Ezekiel 34:23", "reasoning": "Ezekiel 34:23 uses 'shepherd' rather than 'king' in the ESV/NASB for the Davidic figure. This is notable because the chapter has been condemning false shepherds \u2014 so the messianic promise deliberately reuses the shepherd role. A model predicting 'king' has grasped the Davidic domain but missed the shepherd-specific framing that Ezekiel intentionally employs. 'Priest' or 'prophet' would indicate broader messianic category confusion.", "surface_confounder": "king" }, { "id": "CAN_038", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "I will make them one [MASK] in the land, on the mountains of Israel, and one king shall be king over them all.", "targets": [ "nation" ], "foils": [], "acceptable_alternatives": [ "people" ], "failure_examples": [ "army", "temple", "city", "tribe", "camp" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Ezekiel 37:22", "reasoning": "The two-sticks-becoming-one prophecy in Ezekiel 37:15-28 culminates in national reunification. 'Nation' is the ESV canonical term. 'People' is an acceptable alternative found in some translations. 'Tribe' is a confounder because the passage is precisely about overcoming tribal division (Judah vs. Joseph/Ephraim). A model predicting 'tribe' has learned the Israelite political vocabulary but inverted the reunification logic.", "surface_confounder": "tribe" }, { "id": "CAN_039", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "hard", "input": "And I will remove the heart of [MASK] from your flesh and give you a heart of flesh.", "targets": [ "stone" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "iron", "ice", "pride", "sin", "darkness" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "Ezekiel 36:26", "reasoning": "This is the second half of Ezekiel 36:26, testing the stone/flesh contrast from the canonical text directly. While DOC_002 frames this doctrinally, this test requires exact word recall. 'Iron' is a plausible hardness synonym but is not the biblical term. 'Sin' or 'pride' would indicate the model is interpreting the metaphor rather than recalling the canonical language.", "surface_confounder": "iron" }, { "id": "CON_043", "type": "contrastive_theology", "category": "soteriology", "difficulty": "hard", "input": "Ezekiel teaches that the new heart is a sovereign gift of divine [MASK], not a product of human moral effort.", "targets": [ "grace" ], "foils": [ "merit" ], "acceptable_alternatives": [ "mercy" ], "failure_examples": [ "wrath", "power", "law", "anger", "silence" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Ezekiel 36:26-27", "reasoning": "The grace/merit distinction is the crux of monergistic soteriology. Ezekiel 36 grounds regeneration entirely in God's initiative ('I will give... I will put... I will cause'). The foil 'merit' represents a synergistic misreading where the new heart is earned. A model that ranks 'merit' above 'grace' has failed to encode the unilateral divine agency that pervades the passage.", "surface_confounder": "merit" }, { "id": "CON_044", "type": "contrastive_theology", "category": "theology_proper", "difficulty": "hard", "input": "God's promise to restore Israel in Ezekiel 36 is motivated by concern for his holy [MASK], not by Israel's deserving.", "targets": [ "name" ], "foils": [ "people" ], "acceptable_alternatives": [], "failure_examples": [ "land", "temple", "city", "law", "throne" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Ezekiel 36:22", "reasoning": "Ezekiel 36:22 explicitly states 'It is not for your sake... but for the sake of my holy name.' The foil 'people' represents the anthropocentric misreading where God acts primarily for human benefit. A model ranking 'people' above 'name' has not encoded the theocentric motivation that Ezekiel uniquely emphasizes \u2014 God restores Israel to vindicate his own reputation among the nations.", "surface_confounder": "people" }, { "id": "CON_045", "type": "contrastive_theology", "category": "ecclesiology", "difficulty": "medium", "input": "In Ezekiel 34, the LORD condemns shepherds who [MASK] themselves instead of tending the sheep.", "targets": [ "fed" ], "foils": [ "sacrificed" ], "acceptable_alternatives": [ "feed" ], "failure_examples": [ "blessed", "humbled", "purified", "healed", "taught" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Ezekiel 34:2-3", "reasoning": "Ezekiel 34:2 asks 'Should not shepherds feed the flock?' and condemns those who feed themselves. The foil 'sacrificed' inverts the charge \u2014 self-sacrifice would be virtuous. A model ranking 'sacrificed' above 'fed' has confused the Ezekiel 34 indictment (self-serving leadership) with an approval of pastoral self-denial. This tests whether the model encodes the exploitative vs. self-giving shepherd contrast.", "surface_confounder": "sacrificed" }, { "id": "CON_046", "type": "contrastive_theology", "category": "hamartiology", "difficulty": "medium", "input": "Ezekiel 18 teaches that each person is judged for their own [MASK], not punished for the guilt of their parents.", "targets": [ "sin" ], "foils": [ "fate" ], "acceptable_alternatives": [ "sins", "guilt" ], "failure_examples": [ "nature", "birth", "blood", "tribe", "name" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Ezekiel 18:20", "reasoning": "Ezekiel 18 dismantles the proverb about children suffering for parental sin. The foil 'fate' represents fatalistic or deterministic misreadings where judgment is arbitrary rather than moral. A model ranking 'fate' above 'sin' reveals it has not encoded the individual-moral-accountability framework that Ezekiel 18 establishes. 'Nature' would indicate confusion with original sin debates rather than the specific Ezekiel context of individual retribution.", "surface_confounder": "fate" }, { "id": "CON_047", "type": "contrastive_theology", "category": "soteriology", "difficulty": "hard", "input": "The new covenant in Ezekiel involves an internal transformation of the [MASK], not merely external obedience to written commands.", "targets": [ "heart" ], "foils": [ "behavior" ], "acceptable_alternatives": [ "soul", "will" ], "failure_examples": [ "body", "mind", "church", "world", "land" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Ezekiel 36:26-27", "reasoning": "The internal/external contrast is central to Ezekiel's new covenant theology. The foil 'behavior' represents a moralistic reduction where covenant renewal means improved conduct rather than ontological heart-change. A model ranking 'behavior' above 'heart' shows it has learned the obedience domain but not the Ezekiel-specific claim that the locus of transformation is the heart itself, prior to and causative of behavioral change.", "surface_confounder": "behavior" }, { "id": "CON_048", "type": "contrastive_theology", "category": "theology_proper", "difficulty": "medium", "input": "In Ezekiel 43, God's [MASK] returns to the eschatological temple, reversing the departure of chapters 10-11.", "targets": [ "glory" ], "foils": [ "wrath" ], "acceptable_alternatives": [], "failure_examples": [ "prophet", "fire", "cloud", "sword", "angel" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Ezekiel 43:1-5", "reasoning": "The glory-return in Ezekiel 43 is the structural climax of the book \u2014 what departed in judgment now returns in restoration. The foil 'wrath' tests whether the model associates God's return to the temple with judgment rather than restoration. A model ranking 'wrath' above 'glory' has not learned Ezekiel's narrative arc where the temple-filling event is restorative, not punitive.", "surface_confounder": "wrath" }, { "id": "CON_049", "type": "contrastive_theology", "category": "eschatology", "difficulty": "medium", "input": "Ezekiel 37's prophecy of two sticks becoming one signifies national [MASK] under one Davidic king.", "targets": [ "unity" ], "foils": [ "division" ], "acceptable_alternatives": [ "reunion", "union" ], "failure_examples": [ "judgment", "conquest", "exile", "destruction", "war" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Ezekiel 37:22", "reasoning": "The two-sticks sign act (Ezekiel 37:15-28) depicts Judah and Ephraim reunified. The foil 'division' is the exact opposite of the prophecy's thrust but is contextually adjacent \u2014 the passage discusses the historical division as background. A model ranking 'division' above 'unity' reveals it has latched onto the problem-statement rather than the prophetic resolution. This tests whether the model encodes the directional movement of the oracle.", "surface_confounder": "division" }, { "id": "CON_050", "type": "contrastive_theology", "category": "pneumatology", "difficulty": "hard", "input": "In Ezekiel 37, the breath entering the dry bones represents the [MASK] giving life, not mere physical resuscitation.", "targets": [ "spirit" ], "foils": [ "wind" ], "acceptable_alternatives": [], "failure_examples": [ "rain", "sun", "blood", "medicine", "food" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Ezekiel 37:9-10", "reasoning": "The Hebrew ruach means both 'wind' and 'spirit,' creating an intentional theological pun in Ezekiel 37. The foil 'wind' tests whether the model resolves the ruach ambiguity toward its theological-pneumatological sense or its meteorological sense. A model ranking 'wind' above 'spirit' has learned the surface-level nature imagery but missed the prophetic claim that divine Spirit \u2014 not natural force \u2014 is the agent of resurrection life.", "surface_confounder": "wind" }, { "id": "CON_051", "type": "contrastive_theology", "category": "christology", "difficulty": "hard", "input": "The promised Davidic figure in Ezekiel 34 is called [MASK] rather than king, emphasizing care over dominion.", "targets": [ "shepherd" ], "foils": [ "ruler" ], "acceptable_alternatives": [], "failure_examples": [ "warrior", "priest", "prophet", "judge", "lord" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Ezekiel 34:23", "reasoning": "Ezekiel 34:23 deliberately titles the messianic Davidic figure 'shepherd' rather than 'king' or 'ruler.' The foil 'ruler' represents the political-messianic reading that emphasizes sovereignty over nurture. A model ranking 'ruler' above 'shepherd' has encoded generic messianic expectation but not Ezekiel's distinctive pastoral reframing of Davidic hope. The sentence structure ('called X rather than king') provides context that should disambiguate toward 'shepherd.'", "surface_confounder": "ruler" }, { "id": "DOC_061", "type": "doctrinal_association", "category": "soteriology", "difficulty": "medium", "input": "If we confess our sins, he is faithful and just to [MASK] us our sins and to cleanse us from all unrighteousness.", "targets": [ "forgive" ], "foils": [], "acceptable_alternatives": [ "pardon" ], "failure_examples": [ "punish", "charge", "remember", "count", "judge" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "1 John 1:9", "reasoning": "This tests whether the model has learned the conditional promise structure of 1 John 1:9. The co-occurrence of 'confess', 'faithful and just', and 'cleanse' should strongly activate 'forgive' as the soteriological response. A model predicting 'punish' or 'judge' reveals inverted doctrinal polarity \u2014 confusing confession-as-remedy with confession-as-condemnation.", "surface_confounder": "judge" }, { "id": "DOC_062", "type": "doctrinal_association", "category": "theology_proper", "difficulty": "easy", "input": "God is [MASK], and whoever abides in this abides in God, and God abides in him.", "targets": [ "love" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "power", "wrath", "truth", "spirit", "great" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "1 John 4:16", "reasoning": "1 John 4:8 and 4:16 both declare 'God is love' \u2014 a defining ontological claim of the Johannine epistles. The surrounding context of mutual abiding makes this strongly constrained. A model predicting 'spirit' (from John 4:24) reveals cross-corpus bleed without epistle-specific precision. 'Wrath' in top-3 would indicate catastrophic doctrinal inversion.", "surface_confounder": "spirit" }, { "id": "DOC_063", "type": "doctrinal_association", "category": "christology", "difficulty": "medium", "input": "We have an [MASK] with the Father, Jesus Christ the righteous.", "targets": [ "advocate" ], "foils": [], "acceptable_alternatives": [ "intercessor" ], "failure_examples": [ "enemy", "problem", "issue", "agreement", "argument" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "1 John 2:1", "reasoning": "This tests the model's grasp of Christ's intercessory role in Johannine theology. The paracletic function ('advocate' = parakletos) is distinctive to Johannine usage. The phrase 'with the Father' plus 'Jesus Christ the righteous' should constrain the slot to a mediatorial term. 'Agreement' would indicate the model reads 'with the Father' as relational symmetry rather than forensic representation.", "surface_confounder": "agreement" }, { "id": "DOC_064", "type": "doctrinal_association", "category": "soteriology", "difficulty": "hard", "input": "In this the love of God was made manifest among us, that God sent his only Son into the world so that we might [MASK] through him.", "targets": [ "live" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "die", "suffer", "work", "learn", "pray" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "1 John 4:9", "reasoning": "The purpose clause 'so that we might [MASK] through him' tests whether the model associates the incarnation with vivification. 'Live' is the ESV/NASB word and carries soteriological weight \u2014 life through Christ is a core Johannine motif (cf. John 10:10, 20:31). The difficulty is that several generic verbs could fill this slot syntactically; only theological precision selects 'live' at high rank.", "surface_confounder": "believe" }, { "id": "DOC_065", "type": "doctrinal_association", "category": "hamartiology", "difficulty": "medium", "input": "Everyone who makes a practice of sinning also practices [MASK], for sin is lawlessness.", "targets": [ "lawlessness" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "religion", "worship", "prayer", "magic", "violence" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "1 John 3:4", "reasoning": "This tests the Johannine definition of sin as anomia (lawlessness). The sentence structure is nearly tautological \u2014 the second clause defines the first \u2014 so the model must have encoded the specific lexical association between sin and lawlessness. 'Lawlessness' is a single token in BERT and theologically precise. Predicting 'evil' or 'wickedness' would show generic moral vocabulary without the specific Johannine hamartiological formulation.", "surface_confounder": "evil" }, { "id": "DOC_066", "type": "doctrinal_association", "category": "pneumatology", "difficulty": "medium", "input": "Do not believe every spirit, but [MASK] the spirits to see whether they are from God.", "targets": [ "test" ], "foils": [], "acceptable_alternatives": [ "try", "examine" ], "failure_examples": [ "worship", "follow", "summon", "fear", "praise" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "1 John 4:1", "reasoning": "1 John 4:1 introduces the imperative of spiritual discernment \u2014 dokimazete (test/prove). The adversative 'do not believe every spirit, but' strongly constrains the next verb to an evaluative action. A model predicting 'follow' or 'worship' has failed to learn the discernment imperative and instead defaults to devotional language. 'Fear' would indicate a demonological rather than epistemological reading.", "surface_confounder": "follow" }, { "id": "DOC_067", "type": "doctrinal_association", "category": "theology_proper", "difficulty": "easy", "input": "God is [MASK], and in him is no darkness at all.", "targets": [ "light" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "power", "anger", "fire", "silent", "hidden" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "1 John 1:5", "reasoning": "The light/darkness antithesis is the foundational ethical and ontological framework of 1 John. 'God is light' parallels 'God is love' as one of the epistle's two defining divine-attribute declarations. The apophatic second clause ('no darkness at all') makes the slot unambiguous. Failure here indicates minimal Johannine corpus exposure.", "surface_confounder": "" }, { "id": "DOC_068", "type": "doctrinal_association", "category": "christology", "difficulty": "hard", "input": "The [MASK] of Jesus his Son cleanses us from all sin.", "targets": [ "blood" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "name", "word", "power", "hand", "spirit" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "1 John 1:7", "reasoning": "This tests whether the model has internalized the atonement theology of 1 John 1:7. The specific instrument of cleansing is 'blood' \u2014 not the generic 'name' or 'power'. A model ranking 'name' above 'blood' has learned Christological vocabulary broadly but not the sacrificial-atonement framework. 'Spirit' as top-1 would suggest pneumatological confusion with sanctification.", "surface_confounder": "name" }, { "id": "DOC_069", "type": "doctrinal_association", "category": "soteriology", "difficulty": "medium", "input": "See what kind of love the Father has given to us, that we should be called [MASK] of God; and so we are.", "targets": [ "children" ], "foils": [], "acceptable_alternatives": [ "sons" ], "failure_examples": [ "servants", "enemies", "slaves", "prophets", "priests" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "1 John 3:1", "reasoning": "Adoption language is central to Johannine soteriology. The context of lavish paternal love ('what kind of love the Father has given') constrains the relational term to filial language. A model predicting 'servants' has encoded a hierarchical rather than adoptive divine-human relationship. The confirmatory phrase 'and so we are' reinforces the reality of the status, not mere title.", "surface_confounder": "servants" }, { "id": "DOC_070", "type": "doctrinal_association", "category": "hamartiology", "difficulty": "hard", "input": "If we say we have no [MASK], we deceive ourselves, and the truth is not in us.", "targets": [ "sin" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "hope", "faith", "love", "fear", "doubt" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "1 John 1:8", "reasoning": "This tests the model's encoding of Johannine anthropology \u2014 the universal reality of sin. The self-deception frame ('we deceive ourselves') paired with the epistemic claim ('truth is not in us') makes this a hamartiological diagnostic. Difficulty is hard because syntactically many abstract nouns fit after 'have no'; only theological precision yields 'sin'. A model predicting 'hope' or 'faith' has defaulted to devotional filler.", "surface_confounder": "hope" }, { "id": "CAN_040", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "easy", "input": "For this is the love of God, that we keep his [MASK].", "targets": [ "commandments" ], "foils": [], "acceptable_alternatives": [ "commands" ], "failure_examples": [ "secrets", "promises", "treasures", "records" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "1 John 5:3", "reasoning": "The love-obedience link is a signature Johannine theme (cf. John 14:15). 'Keep his commandments' is a high-frequency collocation in the Johannine corpus. The canonical word is unambiguous. Failure here suggests the model has not absorbed the basic Johannine vocabulary of love-as-obedience.", "surface_confounder": "" }, { "id": "CAN_041", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "There is no [MASK] in love, but perfect love casts it out.", "targets": [ "fear" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "hope", "peace", "joy", "pain", "truth" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "1 John 4:18", "reasoning": "1 John 4:18 states the love-fear antithesis. The pronominal 'casts it out' creates a coreference chain that should reinforce the masked word. The canonical word is 'fear'. A model predicting 'pain' or 'joy' has failed to encode the specific Johannine opposition between love and fear, defaulting to emotionally salient terms instead.", "surface_confounder": "pain" }, { "id": "CAN_042", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "This is the victory that has overcome the world \u2014 our [MASK].", "targets": [ "faith" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "strength", "army", "king", "sword", "courage" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "1 John 5:4", "reasoning": "The appositive dash structure ('the victory... \u2014 our faith') is a defining formulation of 1 John 5:4. The military metaphor ('overcome the world') could plausibly attract 'strength' or 'courage', but the canonical text specifies 'faith'. A model predicting martial vocabulary reveals it has learned the metaphor's domain but not the Johannine redefinition of victory as faith.", "surface_confounder": "strength" }, { "id": "CAN_043", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "easy", "input": "We love because he [MASK] loved us.", "targets": [ "first" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "never", "always", "also", "once", "truly" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "1 John 4:19", "reasoning": "1 John 4:19 is a concise statement of prevenient divine love \u2014 God's love is logically and temporally prior. 'First' is the canonical word and establishes the causal priority of divine love. A model predicting 'always' or 'truly' captures the sentiment but misses the temporal-priority claim that is theologically distinctive.", "surface_confounder": "always" }, { "id": "CAN_044", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "hard", "input": "That which was from the beginning, which we have heard, which we have seen with our eyes, which we looked upon and have [MASK] with our hands, concerning the word of life.", "targets": [ "touched" ], "foils": [], "acceptable_alternatives": [ "handled" ], "failure_examples": [ "built", "written", "fought", "broken", "carried" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "1 John 1:1", "reasoning": "The prologue of 1 John establishes the empirical witness of the incarnation via an ascending sequence of sensory verbs: heard, seen, looked upon, touched. The canonical word completing the tactile climax is 'touched' (or 'handled' in KJV/NASB). A model must have encoded the specific sensory-witness vocabulary of the Johannine prologue. Generic manual verbs like 'built' or 'carried' indicate the model reads 'hands' as instrumental rather than sensory.", "surface_confounder": "held" }, { "id": "CAN_045", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "Do not love the [MASK] or the things in it. If anyone loves it, the love of the Father is not in him.", "targets": [ "world" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "church", "law", "flesh", "temple", "money" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "1 John 2:15", "reasoning": "1 John 2:15 introduces the world-love antithesis \u2014 a central ethical imperative of the epistle. 'World' (kosmos) is the canonical term and should be strongly activated by the parallel structure 'or the things in it'. The pronominal back-reference ('If anyone loves it') further constrains the answer. A model predicting 'flesh' has accessed Pauline rather than Johannine ethical vocabulary.", "surface_confounder": "flesh" }, { "id": "CAN_046", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "Beloved, let us love one another, for love is from [MASK].", "targets": [ "god" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "heaven", "above", "within", "nature", "birth" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "1 John 4:7", "reasoning": "1 John 4:7 grounds the love command in divine origin. The source formula 'love is from [X]' should resolve to 'God' as the theological source. 'Heaven' or 'above' would indicate spatial rather than personal attribution \u2014 the model would have learned that love has a transcendent source but missed that John attributes it personally to God.", "surface_confounder": "heaven" }, { "id": "CON_052", "type": "contrastive_theology", "category": "christology", "difficulty": "hard", "input": "Every spirit that confesses that Jesus Christ has come in the [MASK] is from God.", "targets": [ "flesh" ], "foils": [ "spirit" ], "acceptable_alternatives": [], "failure_examples": [ "clouds", "end", "night", "world", "fire" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "1 John 4:2", "reasoning": "This is the central christological test of 1 John \u2014 the anti-docetic confession. The foil 'spirit' represents the exact docetic error John is combating: that Christ came as spirit rather than taking on real flesh. A model ranking 'spirit' above 'flesh' has internalized the docetic heresy rather than Johannine orthodoxy. This is hard because 'spirit' co-occurs frequently with christological contexts.", "surface_confounder": "spirit" }, { "id": "CON_053", "type": "contrastive_theology", "category": "christology", "difficulty": "medium", "input": "Many [MASK] have gone out into the world, those who do not confess the coming of Jesus Christ in the flesh.", "targets": [ "deceivers" ], "foils": [ "prophets" ], "acceptable_alternatives": [], "failure_examples": [ "angels", "kings", "soldiers", "priests", "teachers" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "2 John 1:7", "reasoning": "2 John 7 labels those who deny the incarnation as 'deceivers' (planoi). The foil 'prophets' tests whether the model confuses the warning in 1 John 4:1 about 'false prophets' with the specific labeling in 2 John. Both are Johannine, but the specific term here is 'deceivers'. A model ranking 'prophets' higher reveals it has blurred the distinction between the two related but different Johannine warnings.", "surface_confounder": "prophets" }, { "id": "CON_054", "type": "contrastive_theology", "category": "soteriology", "difficulty": "hard", "input": "He is faithful and [MASK] to forgive us our sins.", "targets": [ "just" ], "foils": [ "merciful" ], "acceptable_alternatives": [ "righteous" ], "failure_examples": [ "willing", "able", "slow", "eager", "quick" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "1 John 1:9", "reasoning": "The theological surprise of 1 John 1:9 is that God's forgiveness is grounded in justice ('just'), not merely mercy. The foil 'merciful' represents a common but theologically imprecise substitution \u2014 while God is merciful, John's point is that forgiveness is a matter of divine faithfulness to the covenant and the satisfaction of justice through Christ's atonement. A model ranking 'merciful' above 'just' has missed this distinctive Reformed/Johannine emphasis.", "surface_confounder": "merciful" }, { "id": "CON_055", "type": "contrastive_theology", "category": "theology_proper", "difficulty": "medium", "input": "God is [MASK], and whoever abides in this abides in God.", "targets": [ "love" ], "foils": [ "just" ], "acceptable_alternatives": [], "failure_examples": [ "power", "wrath", "knowledge", "fury", "nature" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "1 John 4:16", "reasoning": "While God is indeed just (1 John 1:9), the defining ontological predication in this passage is 'God is love'. The foil 'just' tests whether the model can distinguish between divine attributes that are contextually correct. The mutual-abiding language ('whoever abides in this abides in God') is specifically tied to love in 1 John, not to justice. Ranking 'just' above 'love' here reveals context-insensitive attribute retrieval.", "surface_confounder": "just" }, { "id": "CON_056", "type": "contrastive_theology", "category": "hamartiology", "difficulty": "medium", "input": "No one who abides in him keeps on [MASK]; no one who keeps on doing this has either seen him or known him.", "targets": [ "sinning" ], "foils": [ "believing" ], "acceptable_alternatives": [], "failure_examples": [ "praying", "hoping", "living", "working", "growing" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "1 John 3:6", "reasoning": "1 John 3:6 states the incompatibility of abiding in Christ and habitual sin. The foil 'believing' inverts the Johannine logic \u2014 in John's framework, abiding and believing are positively correlated, not opposed. A model ranking 'believing' above 'sinning' has failed to learn the sin-abiding antithesis that structures 1 John 3. The present-tense 'keeps on' signals habitual action, and the negative frame demands a negative behavior.", "surface_confounder": "believing" }, { "id": "CON_057", "type": "contrastive_theology", "category": "pneumatology", "difficulty": "hard", "input": "By this we know the spirit of [MASK] and the spirit of error.", "targets": [ "truth" ], "foils": [ "god" ], "acceptable_alternatives": [], "failure_examples": [ "man", "love", "power", "light", "life" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "1 John 4:6", "reasoning": "1 John 4:6 contrasts 'the spirit of truth' with 'the spirit of error' \u2014 an epistemological pneumatological distinction. The foil 'God' is plausible because 'spirit of God' is a common biblical collocation and appears earlier in the same chapter (4:2). However, the parallel structure with 'spirit of error' demands the antonym 'truth', not the asymmetric 'God'. A model ranking 'God' higher has defaulted to the higher-frequency collocation without attending to the contrastive parallelism.", "surface_confounder": "god" }, { "id": "CON_058", "type": "contrastive_theology", "category": "christology", "difficulty": "medium", "input": "Who is the liar but he who [MASK] that Jesus is the Christ?", "targets": [ "denies" ], "foils": [ "claims" ], "acceptable_alternatives": [ "denied" ], "failure_examples": [ "says", "knows", "believes", "proves", "teaches" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "1 John 2:22", "reasoning": "1 John 2:22 identifies the antichrist as the one who denies Jesus's messiahship. The foil 'claims' tests whether the model understands the polarity of the sentence \u2014 'the liar' is a pejorative frame, so the verb must be adversative. A model ranking 'claims' above 'denies' has misread the christological polemic as a neutral identification statement. 'Believes' or 'says' as top predictions would indicate complete loss of the adversarial frame.", "surface_confounder": "claims" }, { "id": "CON_059", "type": "contrastive_theology", "category": "theology_proper", "difficulty": "easy", "input": "We have come to know and to believe the love that God has for us. God is [MASK].", "targets": [ "love" ], "foils": [ "holy" ], "acceptable_alternatives": [], "failure_examples": [ "dead", "angry", "silent", "distant", "absent" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "1 John 4:16", "reasoning": "The preceding clause explicitly names 'the love that God has for us', making the ontological predication 'God is love' contextually overdetermined. The foil 'holy' tests whether the model defaults to a generic but true divine attribute over the contextually specified one. While God's holiness is orthodox, this specific context demands 'love'. A model ranking 'holy' above 'love' here has poor contextual sensitivity despite broad theological correctness.", "surface_confounder": "holy" }, { "id": "DOC_071", "type": "doctrinal_association", "category": "soteriology", "difficulty": "medium", "input": "Paul writes to the Galatians that a person is not [MASK] by works of the law but through faith in Jesus Christ.", "targets": [ "justified" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "condemned", "punished", "known", "helped", "tested" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Galatians 2:16", "reasoning": "Galatians 2:16 is the thesis statement of the entire epistle. The forensic term 'justified' must be activated by the co-occurrence of 'works of the law' and 'faith in Jesus Christ.' A model predicting 'saved' has learned soteriological domain but not the precise Pauline legal metaphor. 'Condemned' would indicate inverted polarity on the law-gospel axis.", "surface_confounder": "saved" }, { "id": "DOC_072", "type": "doctrinal_association", "category": "soteriology", "difficulty": "medium", "input": "In Galatians, Paul teaches that the law served as a [MASK] until Christ came, so that we might be justified by faith.", "targets": [ "guardian" ], "foils": [], "acceptable_alternatives": [ "tutor", "guide" ], "failure_examples": [ "burden", "curse", "barrier", "punishment", "weapon" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Galatians 3:24", "reasoning": "The paidag\u014dgos metaphor in Galatians 3:24 is Paul's key image for the law's temporary, supervisory role before Christ. 'Guardian' or 'tutor' reflects this custodial function. A model predicting 'curse' has conflated the law-as-guardian passage with the separate curse-of-the-law passage in 3:13, revealing shallow thematic blending rather than passage-level precision.", "surface_confounder": "curse" }, { "id": "DOC_073", "type": "doctrinal_association", "category": "christology", "difficulty": "hard", "input": "Paul declares that he has been [MASK] with Christ; it is no longer he who lives, but Christ who lives in him.", "targets": [ "crucified" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "raised", "buried", "united", "blessed", "born" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "Galatians 2:20", "reasoning": "Galatians 2:20 is one of the most memorized Pauline verses. The union-with-Christ language here is specifically cruciform \u2014 'crucified with Christ' \u2014 not generic. A model predicting 'raised' or 'united' has learned the broader participatory soteriology but missed the specific death-identification that Paul foregrounds. The 'no longer I who lives' clause should strongly activate the death/crucifixion semantic field.", "surface_confounder": "united" }, { "id": "DOC_074", "type": "doctrinal_association", "category": "pneumatology", "difficulty": "easy", "input": "But the [MASK] of the Spirit is love, joy, peace, patience, kindness, goodness, faithfulness, gentleness, and self-control.", "targets": [ "fruit" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "gift", "power", "work", "sign", "law" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Galatians 5:22-23", "reasoning": "The nine-fold 'fruit of the Spirit' catalogue in Galatians 5:22-23 is among the most widely taught passages in the NT. The singular 'fruit' (not 'fruits' or 'gifts') is theologically significant \u2014 it denotes a unified character. A model predicting 'gift' has conflated this with 1 Corinthians 12 charismata lists, revealing cross-book contamination rather than passage-specific encoding.", "surface_confounder": "gift" }, { "id": "DOC_075", "type": "doctrinal_association", "category": "soteriology", "difficulty": "medium", "input": "For [MASK] you have been called to freedom, brothers; only do not use your freedom as an opportunity for the flesh.", "targets": [ "freedom" ], "foils": [], "acceptable_alternatives": [ "liberty" ], "failure_examples": [ "judgment", "law", "service", "suffering", "obedience" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Galatians 5:13", "reasoning": "Galatians 5:13 pivots the epistle from doctrinal argument to ethical exhortation, and freedom is the hinge concept. The sentence structure with 'called to [MASK]' followed by a warning against misusing that same concept should strongly activate 'freedom.' A model predicting 'obedience' or 'service' has collapsed the Pauline freedom-to-serve paradox into a single pole.", "surface_confounder": "obedience" }, { "id": "DOC_076", "type": "doctrinal_association", "category": "soteriology", "difficulty": "hard", "input": "Christ [MASK] us from the curse of the law by becoming a curse for us.", "targets": [ "redeemed" ], "foils": [], "acceptable_alternatives": [ "freed" ], "failure_examples": [ "condemned", "judged", "removed", "taught", "tested" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "Galatians 3:13", "reasoning": "Galatians 3:13 uses the commercial-legal metaphor of redemption (exagoraz\u014d). 'Redeemed' captures the purchase/ransom motif that Paul embeds in curse-bearing substitutionary language. 'Freed' is acceptable but less precise. 'Saved' would indicate domain knowledge without metaphor specificity. 'Condemned' as a top prediction would represent complete doctrinal inversion.", "surface_confounder": "saved" }, { "id": "DOC_077", "type": "doctrinal_association", "category": "ecclesiology", "difficulty": "medium", "input": "There is neither Jew nor Greek, slave nor free, male nor female, for you are all [MASK] in Christ Jesus.", "targets": [ "one" ], "foils": [], "acceptable_alternatives": [ "equal", "united" ], "failure_examples": [ "saved", "holy", "chosen", "lost", "blessed" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Galatians 3:28", "reasoning": "Galatians 3:28 is Paul's strongest statement of baptismal unity across social divisions. 'One' is the canonical term and carries the weight of corporate incorporation into Christ's body. A model predicting 'saved' or 'blessed' has learned positive valence in Christ-language but missed the specific unity-in-diversity claim that is the verse's theological contribution.", "surface_confounder": "saved" }, { "id": "DOC_078", "type": "doctrinal_association", "category": "hamartiology", "difficulty": "medium", "input": "The desires of the [MASK] are against the Spirit, and the desires of the Spirit are against the flesh, for these are opposed to each other.", "targets": [ "flesh" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "world", "heart", "mind", "soul", "body" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Galatians 5:17", "reasoning": "Galatians 5:17 articulates the flesh-Spirit antithesis that structures Paul's ethical theology. 'Flesh' (sarx) is the technical Pauline term for the sin-oriented disposition. 'Body' would indicate the model has collapsed sarx into s\u014dma \u2014 a historically significant theological confusion (Gnostic body-hatred vs. Pauline sin-nature). 'World' would suggest Johannine cross-contamination.", "surface_confounder": "body" }, { "id": "DOC_079", "type": "doctrinal_association", "category": "soteriology", "difficulty": "hard", "input": "God sent forth his Son so that we might receive [MASK] as sons through the Spirit.", "targets": [ "adoption" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "salvation", "blessing", "power", "mercy", "forgiveness" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "Galatians 4:5", "reasoning": "Galatians 4:4-5 grounds the huiothesia (adoption) motif in the sending of the Son. 'Adoption' is the precise soteriological category \u2014 distinct from justification, sanctification, or generic salvation. A model predicting 'salvation' has captured the domain but not the specific ordo salutis locus. Correctly predicting 'adoption' demonstrates the model has learned the filial dimension of Galatians' soteriology.", "surface_confounder": "salvation" }, { "id": "DOC_080", "type": "doctrinal_association", "category": "eschatology", "difficulty": "easy", "input": "Do not be deceived: God is not mocked, for whatever one [MASK] that will he also reap.", "targets": [ "sows" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "does", "says", "wants", "thinks", "gives" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Galatians 6:7", "reasoning": "The sowing-reaping metaphor in Galatians 6:7 is proverbial and widely quoted. The collocational force of 'sow' and 'reap' in the same sentence should make this nearly automatic for any model with agricultural or biblical exposure. Failure to predict 'sows' in the top-10 would suggest the model lacks basic proverbial/agricultural metaphor encoding.", "surface_confounder": "" }, { "id": "CAN_047", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "easy", "input": "I am astonished that you are so quickly deserting him who called you in the [MASK] of Christ.", "targets": [ "grace" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "law", "power", "truth", "way", "light" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Galatians 1:6", "reasoning": "Galatians 1:6 opens the epistle with Paul's characteristic rebuke \u2014 the Galatians are deserting the grace of Christ for a different gospel. 'Grace' is the canonical term and the thematic keyword of the entire letter. A model predicting 'law' here would have inverted the Galatians polarity entirely.", "surface_confounder": "name" }, { "id": "CAN_048", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "For am I now seeking the approval of man, or of God? If I were still trying to [MASK] man, I would not be a servant of Christ.", "targets": [ "please" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "judge", "save", "teach", "follow", "fear" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Galatians 1:10", "reasoning": "Galatians 1:10 establishes Paul's independence from human approval. The exact verb 'please' is canonical across ESV and NASB. 'Serve' would be semantically adjacent but wrong for this verse. A model that predicts 'please' has internalized the specific people-pleasing rejection that Paul uses to defend his apostolic authority.", "surface_confounder": "serve" }, { "id": "CAN_049", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "I have been crucified with Christ. It is no longer I who live, but [MASK] who lives in me.", "targets": [ "christ" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "god", "spirit", "he", "someone", "another" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Galatians 2:20", "reasoning": "Galatians 2:20b names Christ explicitly as the one who now lives in the believer. While 'God' or 'Spirit' are theologically coherent candidates, the canonical text specifically repeats 'Christ' to reinforce the christological union. A model that predicts 'christ' demonstrates verse-level canonical recall, not just theological domain association.", "surface_confounder": "god" }, { "id": "CAN_050", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "hard", "input": "And because you are sons, God has sent the [MASK] of his Son into our hearts, crying 'Abba! Father!'", "targets": [ "spirit" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "word", "love", "power", "grace", "light" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "Galatians 4:6", "reasoning": "Galatians 4:6 is one of the clearest Trinitarian texts in Paul \u2014 the Father sends the Spirit of the Son. 'Spirit' is the canonical term. The difficulty is hard because 'love' and 'grace' are frequent in similar Pauline constructions. A correct top-3 prediction shows the model has encoded the specific pneumatological claim of this verse, not just generic divine-attribute language.", "surface_confounder": "love" }, { "id": "CAN_051", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "easy", "input": "For freedom Christ has set us [MASK]; stand firm therefore, and do not submit again to a yoke of slavery.", "targets": [ "free" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "apart", "aside", "right", "straight", "above" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Galatians 5:1", "reasoning": "Galatians 5:1 is the programmatic declaration of Christian freedom that opens the paraenetic section. 'Free' is the canonical word and is heavily reinforced by the 'freedom' at the start of the sentence and 'slavery' at the end. This is easy because the surrounding lexical field makes any other prediction nearly impossible for a model with basic semantic competence.", "surface_confounder": "" }, { "id": "CAN_052", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "Bear one another's [MASK], and so fulfill the law of Christ.", "targets": [ "burdens" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "sins", "needs", "debts", "sorrows", "faults" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Galatians 6:2", "reasoning": "Galatians 6:2 uses 'burdens' (bar\u0113) as the canonical term for mutual care within the community. 'Sins' would indicate confusion with James 5:16 confessional language. 'Needs' is too generic. The collocation 'bear one another's ___' is a strong canonical signal that should point to 'burdens' specifically in a model with Galatians exposure.", "surface_confounder": "sins" }, { "id": "CON_060", "type": "contrastive_theology", "category": "soteriology", "difficulty": "hard", "input": "Paul insists that a person is justified by [MASK], not by works of the law.", "targets": [ "faith" ], "foils": [ "works" ], "acceptable_alternatives": [], "failure_examples": [ "law", "merit", "effort", "deeds", "obedience" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Galatians 2:16", "reasoning": "This tests the fundamental Galatian antithesis: faith vs. works-of-the-law. The foil 'works' is plausible because the sentence contains 'works of the law' \u2014 a model with shallow co-occurrence patterns might rank 'works' highly due to lexical attraction from later in the sentence. A model that ranks 'faith' above 'works' has learned the contrastive structure of Pauline justification, not just word frequency.", "surface_confounder": "works" }, { "id": "CON_061", "type": "contrastive_theology", "category": "soteriology", "difficulty": "medium", "input": "In Galatians, Paul teaches that believers receive the promised Spirit through [MASK], not through obedience to the Mosaic code.", "targets": [ "faith" ], "foils": [ "obedience" ], "acceptable_alternatives": [ "believing" ], "failure_examples": [ "law", "works", "ritual", "merit", "circumcision" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Galatians 3:2", "reasoning": "Galatians 3:2 asks the Galatians whether they received the Spirit by works of the law or by hearing with faith. The foil 'obedience' is semantically close \u2014 faith involves a kind of obedience (Romans 1:5) \u2014 but in the Galatians context, Paul specifically contrasts Spirit reception through faith against law-keeping. A model that ranks 'faith' above 'obedience' has learned the Galatians-specific polarity.", "surface_confounder": "obedience" }, { "id": "CON_062", "type": "contrastive_theology", "category": "christology", "difficulty": "medium", "input": "According to Galatians, Abraham's offspring refers ultimately not to many descendants but to [MASK], who is Christ.", "targets": [ "one" ], "foils": [ "many" ], "acceptable_alternatives": [], "failure_examples": [ "all", "some", "israel", "jews", "nations" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Galatians 3:16", "reasoning": "Galatians 3:16 contains Paul's famous singular-vs-plural argument about Abraham's seed. The foil 'many' directly opposes Paul's argument that the promise points to one seed (Christ), not to many. A model ranking 'many' above 'one' has failed to encode Paul's christological reading of the Abrahamic promise, which is central to his argument against the Judaizers.", "surface_confounder": "many" }, { "id": "CON_063", "type": "contrastive_theology", "category": "soteriology", "difficulty": "hard", "input": "Paul argues that if righteousness could come through the [MASK], then Christ died for nothing.", "targets": [ "law" ], "foils": [ "gospel" ], "acceptable_alternatives": [], "failure_examples": [ "cross", "church", "spirit", "world", "flesh" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Galatians 2:21", "reasoning": "Galatians 2:21 is Paul's reductio ad absurdum: if law-keeping produces righteousness, the cross is pointless. The foil 'gospel' tests whether the model understands the law-gospel antithesis \u2014 righteousness through the gospel is Paul's positive claim, but 'through the law' is what Paul rejects. A model ranking 'gospel' above 'law' has inverted the conditional logic of the verse.", "surface_confounder": "gospel" }, { "id": "CON_064", "type": "contrastive_theology", "category": "pneumatology", "difficulty": "medium", "input": "Paul exhorts the Galatians to walk by the [MASK] and they will not gratify the desires of the flesh.", "targets": [ "spirit" ], "foils": [ "flesh" ], "acceptable_alternatives": [], "failure_examples": [ "law", "world", "mind", "way", "truth" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Galatians 5:16", "reasoning": "Galatians 5:16 sets up the flesh-Spirit opposition that dominates the rest of the chapter. The foil 'flesh' is maximally contrastive \u2014 a model that ranks 'flesh' above 'spirit' in 'walk by the ___' has completely inverted Paul's ethical imperative. Since both words appear in nearly identical constructions throughout Galatians 5, the model must encode the directional polarity (walk by Spirit \u2192 resist flesh), not just co-occurrence.", "surface_confounder": "flesh" }, { "id": "CON_065", "type": "contrastive_theology", "category": "soteriology", "difficulty": "hard", "input": "In Paul's allegory, believers are children of the [MASK] woman, born according to the promise, not children of the slave.", "targets": [ "free" ], "foils": [ "slave" ], "acceptable_alternatives": [], "failure_examples": [ "first", "holy", "elder", "rich", "young" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Galatians 4:31", "reasoning": "Galatians 4:21-31 allegorizes Sarah and Hagar as two covenants \u2014 freedom (promise) vs. slavery (law). The foil 'slave' tests whether the model can resolve the correct pole when both 'free' and 'slave' appear in the same semantic context. The phrase 'born according to the promise' should bias toward the free woman (Sarah), but 'slave' has strong lexical attraction from the surrounding Hagar narrative.", "surface_confounder": "slave" }, { "id": "CON_066", "type": "contrastive_theology", "category": "ecclesiology", "difficulty": "medium", "input": "Paul warns that those who accept [MASK] as necessary for salvation have fallen away from grace.", "targets": [ "circumcision" ], "foils": [ "baptism" ], "acceptable_alternatives": [], "failure_examples": [ "communion", "fasting", "tithing", "prayer", "sacrifice" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Galatians 5:2-4", "reasoning": "Galatians 5:2-4 addresses circumcision as the specific rite the Judaizers demanded. The foil 'baptism' tests whether the model distinguishes between the two initiatory rites \u2014 circumcision (old covenant marker rejected by Paul as salvific) and baptism (new covenant marker affirmed by Paul). A model ranking 'baptism' above 'circumcision' has confused the sign Paul attacks with the sign Paul endorses.", "surface_confounder": "baptism" }, { "id": "CON_067", "type": "contrastive_theology", "category": "hamartiology", "difficulty": "medium", "input": "Galatians teaches that the works of the flesh include sexual immorality, idolatry, and [MASK], which are contrary to the Spirit.", "targets": [ "sorcery" ], "foils": [ "prophecy" ], "acceptable_alternatives": [ "jealousy", "envy", "strife" ], "failure_examples": [ "prayer", "fasting", "worship", "teaching", "charity" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Galatians 5:19-20", "reasoning": "Galatians 5:19-20 lists sorcery (pharmakeia) among the works of the flesh alongside sexual immorality and idolatry. The foil 'prophecy' tests whether the model distinguishes vice lists from spiritual gift lists \u2014 prophecy appears in 1 Corinthians 12 as a gift of the Spirit. A model ranking 'prophecy' above 'sorcery' has contaminated the Galatians vice list with Corinthian charismata vocabulary.", "surface_confounder": "prophecy" }, { "id": "CON_068", "type": "contrastive_theology", "category": "christology", "difficulty": "hard", "input": "Paul received the gospel not from any human source but through a direct [MASK] of Jesus Christ.", "targets": [ "revelation" ], "foils": [ "tradition" ], "acceptable_alternatives": [], "failure_examples": [ "teaching", "letter", "command", "vision", "prophecy" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Galatians 1:12", "reasoning": "Galatians 1:12 establishes that Paul's gospel came by apokalypsis (revelation), not by human transmission. The foil 'tradition' (paradosis) represents exactly the human-transmission category Paul rejects. This tests whether the model has encoded the revelation-vs-tradition contrast that grounds Paul's apostolic authority claim. A model ranking 'tradition' above 'revelation' has inverted the epistemological foundation of the epistle.", "surface_confounder": "tradition" }, { "id": "DOC_081", "type": "doctrinal_association", "category": "christology", "difficulty": "medium", "input": "In the beginning was the [MASK], and the Word was with God, and the Word was God.", "targets": [ "word" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "light", "spirit", "law", "lord", "man" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "John 1:1", "reasoning": "The Logos prologue is the foundational christological text in John. The term 'Word' (Logos) as a title for the pre-incarnate Christ is unique to Johannine theology. A model that predicts 'spirit' or 'light' has captured Johannine vocabulary but not the specific Logos identification that opens the Gospel. Correct prediction demonstrates the model has internalized the prologue's distinctive christological formula.", "surface_confounder": "light" }, { "id": "DOC_082", "type": "doctrinal_association", "category": "christology", "difficulty": "medium", "input": "And the Word became [MASK] and dwelt among us, and we have seen his glory.", "targets": [ "flesh" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "spirit", "light", "visible", "known", "real" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "John 1:14", "reasoning": "The incarnation formula 'the Word became flesh' is the climactic statement of the Johannine prologue. 'Flesh' (sarx) is the theologically precise term for the full material humanity Christ assumed. A model predicting 'man' or 'human' captures the general sense but misses the specific anti-docetic emphasis of sarx. 'Spirit' would indicate inverted christological polarity.", "surface_confounder": "man" }, { "id": "DOC_083", "type": "doctrinal_association", "category": "soteriology", "difficulty": "medium", "input": "Jesus answered him, 'Truly, truly, I say to you, unless one is born [MASK], he cannot see the kingdom of God.'", "targets": [ "again" ], "foils": [], "acceptable_alternatives": [ "anew" ], "failure_examples": [ "righteous", "holy", "pure", "good", "sinless" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "John 3:3", "reasoning": "The concept of new birth (born again/from above) in the Nicodemus discourse is central to Johannine soteriology. The Greek anothen carries a deliberate ambiguity between 'again' and 'from above' that drives the dialogue. A model that predicts moral adjectives like 'righteous' or 'holy' reveals it has not learned the regeneration vocabulary distinctive to John 3.", "surface_confounder": "" }, { "id": "DOC_084", "type": "doctrinal_association", "category": "soteriology", "difficulty": "easy", "input": "For God so loved the world, that he gave his only Son, that whoever [MASK] in him should not perish but have eternal life.", "targets": [ "believes" ], "foils": [], "acceptable_alternatives": [ "trusts" ], "failure_examples": [ "lives", "hopes", "works", "prays", "obeys" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "John 3:16", "reasoning": "Pisteu\u014d (believe/trust) is the primary soteriological verb in John's Gospel, appearing nearly 100 times. John 3:16 is the highest-frequency evangelical verse. A model failing to predict 'believes' here lacks even basic Johannine soteriological encoding. 'Works' or 'obeys' as top predictions would indicate a synergistic confusion foreign to John's faith-centered soteriology.", "surface_confounder": "" }, { "id": "DOC_085", "type": "doctrinal_association", "category": "christology", "difficulty": "medium", "input": "Jesus said to them, 'I am the [MASK] of life; whoever comes to me shall not hunger.'", "targets": [ "bread" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "water", "light", "tree", "source", "way" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "John 6:35", "reasoning": "The Bread of Life discourse in John 6 follows the feeding of the five thousand. 'Bread' is the specific 'I am' predicate tied to hunger/eating imagery. 'Water' is a related Johannine metaphor but belongs to the Samaritan woman discourse (John 4). A model that correctly selects 'bread' over 'water' or 'light' has learned the contextual pairing of specific 'I am' predicates with their associated imagery.", "surface_confounder": "water" }, { "id": "DOC_086", "type": "doctrinal_association", "category": "christology", "difficulty": "medium", "input": "Jesus said, 'I am the [MASK] of the world. Whoever follows me will not walk in darkness.'", "targets": [ "light" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "bread", "hope", "king", "lord", "master" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "John 8:12", "reasoning": "The light/darkness dualism is a major Johannine motif introduced in the prologue (1:4-5) and developed through the 'I am' statements. The co-occurrence of 'follows me' and 'not walk in darkness' should strongly activate 'light.' A model predicting 'bread' here has learned that John uses 'I am' + noun patterns but has not differentiated the specific predicates by their surrounding semantic fields.", "surface_confounder": "way" }, { "id": "DOC_087", "type": "doctrinal_association", "category": "christology", "difficulty": "hard", "input": "I am the good [MASK]. The good one lays down his life for the sheep.", "targets": [ "shepherd" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "master", "king", "lord", "servant", "leader" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "John 10:11", "reasoning": "The Good Shepherd discourse in John 10 draws on OT shepherd imagery (Ezekiel 34, Psalm 23) while making a unique christological claim. The specific pairing of 'lays down his life' with shepherd identity fuses sacrificial atonement with pastoral care. A model predicting generic authority terms like 'master' or 'king' has missed the pastoral-sacrificial fusion unique to this 'I am' saying.", "surface_confounder": "master" }, { "id": "DOC_088", "type": "doctrinal_association", "category": "christology", "difficulty": "medium", "input": "Jesus said to her, 'I am the [MASK] and the life. Whoever believes in me, though he die, yet shall he live.'", "targets": [ "resurrection" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "truth", "way", "light", "hope", "bread" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "John 11:25", "reasoning": "Spoken to Martha before the raising of Lazarus, this 'I am' statement pairs resurrection with life in the context of physical death. The narrative setting of Lazarus's tomb and Martha's eschatological confession (11:24) make 'resurrection' the theologically precise term. A model predicting 'way' or 'truth' has confused this with John 14:6, revealing failure to disambiguate distinct 'I am' statements by narrative context.", "surface_confounder": "way" }, { "id": "DOC_089", "type": "doctrinal_association", "category": "pneumatology", "difficulty": "hard", "input": "But the [MASK], the Holy Spirit, whom the Father will send in my name, he will teach you all things.", "targets": [ "helper" ], "foils": [], "acceptable_alternatives": [ "advocate", "counselor", "comforter" ], "failure_examples": [ "prophet", "angel", "servant", "son", "priest" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "John 14:26", "reasoning": "The Paraclete (parakl\u0113tos) is John's distinctive title for the Holy Spirit, rendered variously as Helper, Advocate, Counselor, or Comforter across English translations. All four acceptable alternatives map to this single Greek term. A model predicting 'prophet' or 'angel' has failed to learn the Johannine Paraclete pneumatology that is absent from the Synoptics. The wide acceptable set makes this test translation-robust.", "surface_confounder": "" }, { "id": "DOC_090", "type": "doctrinal_association", "category": "christology", "difficulty": "medium", "input": "I am the true [MASK], and my Father is the vinedresser.", "targets": [ "vine" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "shepherd", "bread", "light", "tree", "son" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "John 15:1", "reasoning": "The vine and branches allegory in John 15 is the final 'I am' statement with a predicate nominative. 'Vinedresser' in the same sentence provides the agricultural semantic frame that should activate 'vine' specifically. A model predicting other 'I am' predicates like 'shepherd' or 'bread' has generalized the pattern without learning the specific semantic associations. 'Tree' is botanically adjacent but misses the viticulture vocabulary.", "surface_confounder": "tree" }, { "id": "DOC_091", "type": "doctrinal_association", "category": "pneumatology", "difficulty": "hard", "input": "When the Spirit of [MASK] comes, he will guide you into all the truth.", "targets": [ "truth" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "life", "love", "power", "grace", "wisdom" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "John 16:13", "reasoning": "John uniquely titles the Holy Spirit 'the Spirit of truth' (14:17, 15:26, 16:13), linking pneumatology to the truth motif that pervades the Gospel. The co-occurrence of 'Spirit of ___' with 'guide you into all the truth' should reinforce the specific Johannine designation. A model predicting Pauline titles like 'life' or 'power' has not differentiated Johannine from Pauline pneumatological vocabulary.", "surface_confounder": "life" }, { "id": "DOC_092", "type": "doctrinal_association", "category": "soteriology", "difficulty": "easy", "input": "In John's Gospel, eternal [MASK] is the gift God gives to all who believe in his Son.", "targets": [ "life" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "rest", "peace", "glory", "joy", "hope" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "John 3:16", "reasoning": "Eternal life (z\u014d\u0113 ai\u014dnios) is John's dominant soteriological concept, appearing over 15 times in the Gospel. Unlike the Synoptics' emphasis on 'kingdom,' John foregrounds life as the present possession of believers. A model failing to complete 'eternal ___' with 'life' in a Johannine context has not internalized the most basic vocabulary of the Fourth Gospel.", "surface_confounder": "" }, { "id": "DOC_093", "type": "doctrinal_association", "category": "theology_proper", "difficulty": "hard", "input": "God is [MASK], and those who worship him must worship in spirit and truth.", "targets": [ "spirit" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "love", "holy", "good", "light", "one" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "John 4:24", "reasoning": "Jesus's declaration to the Samaritan woman that 'God is spirit' is one of only three 'God is ___' predications in the Johannine corpus (alongside 'God is light' in 1 John 1:5 and 'God is love' in 1 John 4:8). The surrounding clause 'worship in spirit and truth' provides a strong contextual signal. A model predicting 'love' has conflated the Johannine epistles with the Gospel, while 'holy' reflects generic theological language rather than John's specific ontological claim.", "surface_confounder": "love" }, { "id": "DOC_094", "type": "doctrinal_association", "category": "ecclesiology", "difficulty": "hard", "input": "A new [MASK] I give to you, that you love one another: just as I have loved you.", "targets": [ "commandment" ], "foils": [], "acceptable_alternatives": [ "command" ], "failure_examples": [ "law", "covenant", "promise", "teaching", "gift" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "John 13:34", "reasoning": "The 'new commandment' (entol\u0113 kain\u0113) is the ethical center of the farewell discourse. 'Commandment' is more precise than 'command' because it echoes the Decalogue context while marking the distinctiveness ('new') of Jesus's love ethic. A model predicting 'covenant' has imported Jeremiah 31 / Hebrews vocabulary, while 'law' misses the deliberate contrast Jesus draws between Mosaic law and his personal love mandate.", "surface_confounder": "covenant" }, { "id": "CAN_053", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "easy", "input": "For God so loved the world, that he gave his only [MASK], that whoever believes in him should not perish but have eternal life.", "targets": [ "son" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "prophet", "servant", "angel", "word", "spirit" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "John 3:16", "reasoning": "John 3:16 is the highest-frequency verse in evangelical training corpora. 'Son' is the unambiguous canonical term. Failure to predict it in top-10 indicates the model has not internalized even the most basic Johannine texts.", "surface_confounder": "" }, { "id": "CAN_054", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "easy", "input": "In the beginning was the Word, and the Word was with [MASK], and the Word was God.", "targets": [ "god" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "man", "them", "us", "christ", "jesus" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "John 1:1", "reasoning": "The threefold repetition of 'God' in John 1:1 establishes the Logos's divine identity. The phrase 'the Word was with ___' requires 'God' to establish the relational yet distinct personhood within the Godhead. This is a baseline canonical recall test for the most theologically dense verse in the Gospel.", "surface_confounder": "" }, { "id": "CAN_055", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "Jesus wept.", "targets": [ "wept" ], "foils": [], "acceptable_alternatives": [ "cried" ], "failure_examples": [ "spoke", "prayed", "smiled", "slept", "left" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "John 11:35", "reasoning": "John 11:35 is the shortest verse in the Bible and among the most widely memorized. This tests whether the model can complete even a two-word canonical phrase. The difficulty is medium rather than easy because the input is extremely short, providing minimal context for prediction. Note: this test case masks the verb rather than a noun, testing a different dimension of canonical recall.", "surface_confounder": "" }, { "id": "CAN_056", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "I am the way, and the [MASK], and the life. No one comes to the Father except through me.", "targets": [ "truth" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "light", "door", "bread", "word", "hope" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "John 14:6", "reasoning": "The triad 'way, truth, life' is a fixed canonical phrase from John 14:6. With 'way' and 'life' present, the model must recall that 'truth' completes the sequence. A model predicting other Johannine 'I am' predicates like 'light' or 'door' has learned the general pattern but not the specific triadic structure of this verse.", "surface_confounder": "light" }, { "id": "CAN_057", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "The [MASK] was in the world, and the world was made through him, yet the world did not know him.", "targets": [ "light" ], "foils": [], "acceptable_alternatives": [ "word" ], "failure_examples": [ "spirit", "lord", "son", "king", "prophet" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "John 1:10", "reasoning": "John 1:9-10 transitions from 'the true light' to the world's rejection. Both 'light' (ESV/NASB continuing from v.9) and 'Word' (as the antecedent subject of the prologue) are defensible readings. The world-rejection motif and the 'made through him' clause are distinctively Johannine prologue vocabulary. A model predicting 'spirit' or 'lord' has lost the prologue's semantic thread.", "surface_confounder": "" }, { "id": "CAN_058", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "hard", "input": "Behold, the [MASK] of God, who takes away the sin of the world!", "targets": [ "lamb" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "son", "servant", "spirit", "glory", "hand" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "John 1:29", "reasoning": "John the Baptist's declaration 'the Lamb of God' is unique to the Fourth Gospel and establishes sacrificial atonement imagery from the outset. 'Son' is a strong confounder because 'Son of God' is a far more frequent christological title, but the specific canonical phrase here requires 'Lamb.' A model that has learned the John 1 narrative sequence will distinguish the Baptist's testimony vocabulary from generic christological titles.", "surface_confounder": "son" }, { "id": "CAN_059", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "hard", "input": "Pilate said to them, '[MASK] your King!' But they cried out, 'Away with him, crucify him!'", "targets": [ "behold" ], "foils": [], "acceptable_alternatives": [ "here" ], "failure_examples": [ "worship", "crown", "see", "honor", "take" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "John 19:14-15", "reasoning": "Pilate's 'Behold your King' echoes his earlier 'Behold the man' (Ecce Homo, 19:5). The verb 'behold' (ide/idou) is the canonical term in English translations. This tests whether the model has learned the specific Johannine passion narrative vocabulary. 'See' is semantically close but not the canonical rendering, making this a precision test.", "surface_confounder": "see" }, { "id": "CAN_060", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "easy", "input": "Thomas answered him, 'My [MASK] and my God!'", "targets": [ "lord" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "king", "master", "savior", "father", "friend" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "John 20:28", "reasoning": "Thomas's confession 'My Lord and my God' is the christological climax of the Fourth Gospel, forming an inclusio with the prologue's 'the Word was God.' The dual title 'Lord and God' is a high-frequency evangelical phrase. A model predicting 'king' or 'master' has captured authority vocabulary but missed the specific canonical confession that anchors John's entire christological argument.", "surface_confounder": "" }, { "id": "CAN_061", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "It is [MASK]. And he bowed his head and gave up his spirit.", "targets": [ "finished" ], "foils": [], "acceptable_alternatives": [ "done" ], "failure_examples": [ "over", "complete", "enough", "time", "written" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "John 19:30", "reasoning": "Jesus's final word from the cross in John \u2014 tetelestai ('It is finished') \u2014 is a theologically loaded declaration of completed atonement. The Greek perfect tense signals a once-for-all accomplished work. 'Done' is an acceptable alternative but lacks the theological weight. A model predicting 'over' or 'complete' has captured the semantic domain but missed the specific canonical term that carries sacrificial-completion theology.", "surface_confounder": "over" }, { "id": "CON_069", "type": "contrastive_theology", "category": "christology", "difficulty": "hard", "input": "The Word was with God, and the Word was [MASK].", "targets": [ "god" ], "foils": [ "divine" ], "acceptable_alternatives": [], "failure_examples": [ "holy", "good", "spirit", "eternal", "powerful" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "John 1:1", "reasoning": "The foil 'divine' represents the Arian/JW dilution of John 1:1c \u2014 treating theos as a qualitative adjective ('the Word was divine/a god') rather than identifying the Logos as fully God. This is the most contested clause in Johannine christology. A model ranking 'god' above 'divine' demonstrates it has learned the orthodox identity claim rather than the subordinationist adjectival reading.", "surface_confounder": "divine" }, { "id": "CON_070", "type": "contrastive_theology", "category": "soteriology", "difficulty": "medium", "input": "Whoever [MASK] in the Son has eternal life; whoever does not obey the Son shall not see life.", "targets": [ "believes" ], "foils": [ "obeys" ], "acceptable_alternatives": [ "trusts" ], "failure_examples": [ "follows", "serves", "knows", "loves", "seeks" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "John 3:36", "reasoning": "John 3:36 deliberately contrasts 'believes' (pisteu\u014dn) in the first clause with 'does not obey' (apeith\u014dn) in the second. The foil 'obeys' tests whether the model collapses the believe/obey distinction by using the same verb for both clauses. Johannine soteriology grounds salvation in belief as the primary instrument, not obedience \u2014 though disobedience characterizes unbelief. A correct prediction shows the model has learned the asymmetric pairing.", "surface_confounder": "obeys" }, { "id": "CON_071", "type": "contrastive_theology", "category": "christology", "difficulty": "hard", "input": "No one has ever seen God; the only [MASK] Son, who is at the Father's side, he has made him known.", "targets": [ "begotten" ], "foils": [ "created" ], "acceptable_alternatives": [ "born" ], "failure_examples": [ "chosen", "holy", "beloved", "divine", "righteous" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "John 1:18", "reasoning": "The foil 'created' represents the Arian heresy that the Son is the first and greatest creature rather than eternally generated from the Father. 'Begotten' (monogen\u0113s) in Nicene theology signals eternal generation \u2014 same substance, not made. A model ranking 'begotten' above 'created' demonstrates it has absorbed the Nicene-Johannine christological distinction rather than the Arian alternative.", "surface_confounder": "created" }, { "id": "CON_072", "type": "contrastive_theology", "category": "soteriology", "difficulty": "medium", "input": "Jesus answered, 'Truly, truly, I say to you, unless one is born of water and the [MASK], he cannot enter the kingdom of God.'", "targets": [ "spirit" ], "foils": [ "flesh" ], "acceptable_alternatives": [], "failure_examples": [ "law", "word", "blood", "fire", "truth" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "John 3:5", "reasoning": "The foil 'flesh' represents the natural/unregenerate state that Jesus contrasts with Spirit-birth in John 3:6 ('that which is born of the flesh is flesh'). The water-and-Spirit pairing describes regeneration, while water-and-flesh would describe merely natural birth. A model ranking 'spirit' above 'flesh' has learned the regeneration schema rather than collapsing the flesh/Spirit antithesis that drives the Nicodemus discourse.", "surface_confounder": "flesh" }, { "id": "CON_073", "type": "contrastive_theology", "category": "christology", "difficulty": "medium", "input": "I and the [MASK] are one.", "targets": [ "father" ], "foils": [ "spirit" ], "acceptable_alternatives": [], "failure_examples": [ "world", "church", "law", "temple", "truth" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "John 10:30", "reasoning": "The foil 'spirit' tests whether the model confuses the Father-Son unity claim of John 10:30 with a broader trinitarian formula. Jesus's statement 'I and the Father are one' is a dyadic (not triadic) unity claim that provoked charges of blasphemy. A model predicting 'spirit' has imported a trinitarian reflex that misplaces the specific relational claim being made in this narrative context.", "surface_confounder": "spirit" }, { "id": "CON_074", "type": "contrastive_theology", "category": "pneumatology", "difficulty": "hard", "input": "But when the [MASK] of truth comes, he will guide you into all the truth, for he will not speak on his own authority.", "targets": [ "spirit" ], "foils": [ "angel" ], "acceptable_alternatives": [ "helper" ], "failure_examples": [ "prophet", "teacher", "messenger", "servant", "son" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "John 16:13", "reasoning": "The foil 'angel' tests whether the model reduces the Paraclete to a created messenger rather than recognizing the Third Person of the Trinity. The phrase 'Spirit of truth' is a fixed Johannine title (14:17, 15:26, 16:13). A model ranking 'spirit' above 'angel' demonstrates it has learned that John's farewell discourse promises a divine person, not a creature.", "surface_confounder": "angel" }, { "id": "CON_075", "type": "contrastive_theology", "category": "soteriology", "difficulty": "hard", "input": "For by [MASK] you have been saved through faith. And this is not your own doing; it is the gift of God.", "targets": [ "grace" ], "foils": [ "works" ], "acceptable_alternatives": [], "failure_examples": [ "law", "merit", "effort", "obedience", "sacrifice" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "", "reasoning": "While this formula echoes Ephesians 2:8, it tests a core Johannine soteriological principle: salvation as divine gift (cf. John 3:16, 4:10, 6:44). The foil 'works' represents the Pelagian/semi-Pelagian error of human merit contributing to salvation. In John's soteriology, coming to Christ is itself enabled by the Father's drawing (6:44). A model ranking 'grace' above 'works' has internalized the gift-character of Johannine salvation.", "surface_confounder": "works" }, { "id": "CON_076", "type": "contrastive_theology", "category": "christology", "difficulty": "medium", "input": "The true [MASK] that gives light to everyone was coming into the world.", "targets": [ "light" ], "foils": [ "law" ], "acceptable_alternatives": [], "failure_examples": [ "prophet", "king", "teacher", "word", "god" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "John 1:9", "reasoning": "The foil 'law' tests whether the model substitutes Torah-centered theology for John's light christology. John 1:17 explicitly contrasts law (given through Moses) with grace and truth (through Jesus Christ). The prologue identifies Christ as 'the true light,' not 'the true law.' A model ranking 'light' above 'law' has learned John's christological framework over a Mosaic one.", "surface_confounder": "law" }, { "id": "CON_077", "type": "contrastive_theology", "category": "soteriology", "difficulty": "medium", "input": "No one can come to me unless the [MASK] who sent me draws him.", "targets": [ "father" ], "foils": [ "man" ], "acceptable_alternatives": [], "failure_examples": [ "spirit", "prophet", "law", "scripture", "world" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "John 6:44", "reasoning": "The foil 'man' tests whether the model replaces divine initiative in salvation with human agency. John 6:44 is a key text for monergistic soteriology \u2014 the Father's drawing is the necessary precondition for coming to Christ. A model ranking 'father' above 'man' demonstrates it has learned that in Johannine theology, salvation originates in divine action, not human decision.", "surface_confounder": "man" }, { "id": "CON_078", "type": "contrastive_theology", "category": "christology", "difficulty": "hard", "input": "Before Abraham was, I [MASK].", "targets": [ "am" ], "foils": [ "was" ], "acceptable_alternatives": [], "failure_examples": [ "existed", "lived", "came", "stood", "appeared" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "John 8:58", "reasoning": "This is the most compressed christological claim in John. 'I am' (eg\u014d eimi) is a present-tense divine self-identification echoing Exodus 3:14, while the foil 'was' (past tense) would reduce the claim to mere pre-existence without the divine-name identification. The Jews' attempt to stone Jesus (8:59) confirms they understood the divine claim. A model ranking 'am' above 'was' has learned the absolute 'I am' formula's theological weight beyond simple temporal pre-existence.", "surface_confounder": "was" }, { "id": "CON_079", "type": "contrastive_theology", "category": "pneumatology", "difficulty": "medium", "input": "The wind blows where it wishes, and you hear its sound, but you do not know where it comes from or where it goes. So it is with everyone who is born of the [MASK].", "targets": [ "spirit" ], "foils": [ "water" ], "acceptable_alternatives": [], "failure_examples": [ "wind", "flesh", "world", "law", "earth" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "John 3:8", "reasoning": "The foil 'water' tests whether the model conflates the two elements mentioned in John 3:5 ('born of water and the Spirit'). The wind/Spirit analogy in 3:8 specifically illustrates Spirit-birth's mysterious sovereignty \u2014 the Greek pneuma means both 'wind' and 'spirit.' A model ranking 'spirit' above 'water' has tracked the analogy's referent correctly through the Nicodemus discourse.", "surface_confounder": "water" }, { "id": "CON_080", "type": "contrastive_theology", "category": "eschatology", "difficulty": "hard", "input": "I am the resurrection and the [MASK]. Whoever believes in me, though he die, yet shall he live.", "targets": [ "life" ], "foils": [ "death" ], "acceptable_alternatives": [], "failure_examples": [ "way", "truth", "hope", "light", "end" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "John 11:25", "reasoning": "The foil 'death' represents a semantic inversion \u2014 'resurrection and the death' would be an oxymoron that inverts the Johannine realized eschatology. Jesus claims to be the embodiment of both resurrection and ongoing life, not resurrection from death. A model ranking 'life' above 'death' has learned that John pairs resurrection with its positive result (life), not its precondition (death). This tests whether the model's eschatological representations correctly encode the direction of Johannine soteriology.", "surface_confounder": "death" }, { "id": "DOC_095", "type": "doctrinal_association", "category": "soteriology", "difficulty": "medium", "input": "Packer argues that the highest privilege of the gospel is not forgiveness or peace, but [MASK] into God's family as his children.", "targets": [ "adoption" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "entry", "acceptance", "invitation", "welcome", "baptism" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "", "reasoning": "Adoption is the climactic doctrine in Knowing God. Packer devotes an entire chapter to arguing that adoption \u2014 not justification \u2014 is the highest privilege of the Christian life. A model that predicts 'acceptance' or 'entry' has learned generic relational language but not Packer's precise soteriological vocabulary. Predicting 'adoption' demonstrates internalization of the Reformed ordo salutis as Packer frames it.", "surface_confounder": "acceptance" }, { "id": "DOC_096", "type": "doctrinal_association", "category": "theology_proper", "difficulty": "hard", "input": "The difference between knowing about God and truly [MASK] God is the central distinction Packer draws in his theology of personal relationship with the Creator.", "targets": [ "knowing" ], "foils": [], "acceptable_alternatives": [ "encountering" ], "failure_examples": [ "worshiping", "loving", "serving", "following", "obeying" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "", "reasoning": "The entire thesis of Knowing God rests on the distinction between propositional knowledge about God and relational knowledge of God. The verb 'knowing' must appear to complete the contrast with 'knowing about.' A model predicting 'loving' or 'serving' has collapsed relational theology into ethics rather than capturing Packer's epistemological distinction. This is hard because the MASK position invites many relational verbs.", "surface_confounder": "loving" }, { "id": "DOC_097", "type": "doctrinal_association", "category": "theology_proper", "difficulty": "medium", "input": "God's [MASK] means that he does not change in his character, purposes, or promises \u2014 what he was, he is and always will be.", "targets": [ "immutability" ], "foils": [], "acceptable_alternatives": [ "unchangeableness" ], "failure_examples": [ "power", "glory", "eternity", "love", "holiness" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "", "reasoning": "Packer devotes careful attention to God's immutability as the ground of all confidence in his promises. The surrounding context \u2014 'does not change,' 'character, purposes, or promises' \u2014 narrows to the technical attribute term. A model predicting 'eternity' has confused temporal endurance with ontological constancy. Predicting 'immutability' shows the model links the explanatory gloss to the precise systematic-theology term.", "surface_confounder": "eternity" }, { "id": "DOC_098", "type": "doctrinal_association", "category": "theology_proper", "difficulty": "medium", "input": "Packer insists that God's [MASK] is not an embarrassing Old Testament relic but a necessary expression of his holy love for his covenant people.", "targets": [ "wrath" ], "foils": [], "acceptable_alternatives": [ "jealousy" ], "failure_examples": [ "anger", "power", "judgment", "violence", "vengeance" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "", "reasoning": "Packer treats divine wrath as a doctrine modern Christians are tempted to discard, arguing instead that it flows necessarily from God's holiness and love. The phrase 'embarrassing Old Testament relic' signals the cultural discomfort Packer addresses. 'Jealousy' is an acceptable alternative since Packer makes a similar rehabilitative argument for that attribute. 'Anger' as a top prediction would show the model has a colloquial rather than theological register. 'Violence' would indicate a critical-theory framing foreign to Packer's Reformed theology.", "surface_confounder": "anger" }, { "id": "CAN_062", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "easy", "input": "Be still, and [MASK] that I am God; I will be exalted among the nations.", "targets": [ "know" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "see", "hear", "believe", "remember", "trust" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Psalm 46:10", "reasoning": "Psalm 46:10 is a cornerstone text for Packer's thesis that the purpose of theology is personal knowledge of God. The imperative 'know' completes the most recognizable form of this verse. A model predicting 'see' or 'believe' has learned devotional register but not the specific canonical wording. This is easy because the verse is extremely high-frequency in Christian corpora.", "surface_confounder": "" }, { "id": "CAN_063", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "And this is eternal life, that they [MASK] you, the only true God, and Jesus Christ whom you have sent.", "targets": [ "know" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "worship", "love", "serve", "follow", "obey" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "John 17:3", "reasoning": "John 17:3 is arguably the single most important proof text for Packer's entire project \u2014 eternal life defined as knowing God. The verb 'know' must appear here; substituting 'worship' or 'love' would collapse Packer's epistemological framing into generic piety. A correct prediction shows the model has encoded the Johannine definition of eternal life as relational knowledge.", "surface_confounder": "worship" }, { "id": "CAN_064", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "For I am the LORD, I do not [MASK]; therefore you, O sons of Jacob, are not consumed.", "targets": [ "change" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "fail", "sleep", "forget", "waver", "relent" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Malachi 3:6", "reasoning": "Malachi 3:6 is the key proof text Packer marshals for divine immutability. The logical structure \u2014 God's unchangeableness is the reason Israel survives \u2014 requires 'change' as the negated verb. 'Fail' is a plausible generic alternative but loses the precise attribute Packer is establishing. Correct prediction demonstrates the model links the covenantal consequence ('not consumed') to the ontological ground ('do not change').", "surface_confounder": "fail" }, { "id": "CON_081", "type": "contrastive_theology", "category": "soteriology", "difficulty": "hard", "input": "Packer argues that believers are not merely pardoned criminals but [MASK] children of the living God.", "targets": [ "adopted" ], "foils": [ "forgiven" ], "acceptable_alternatives": [], "failure_examples": [ "good", "humble", "faithful", "obedient", "worthy" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "", "reasoning": "This tests whether the model has learned Packer's crucial distinction between justification (pardon) and adoption (family status). The foil 'forgiven' stays in the soteriological domain but collapses back into the forensic category the sentence explicitly moves beyond ('not merely pardoned'). A model ranking 'forgiven' above 'adopted' has failed to encode the contrastive structure of Packer's argument. This is hard because 'forgiven' and 'pardoned' are near-synonyms, making the foil very attractive.", "surface_confounder": "forgiven" }, { "id": "CON_082", "type": "contrastive_theology", "category": "theology_proper", "difficulty": "medium", "input": "According to Packer, our study of God's nature must lead not to academic pride but to humble [MASK] before his majesty.", "targets": [ "worship" ], "foils": [ "knowledge" ], "acceptable_alternatives": [ "adoration" ], "failure_examples": [ "study", "analysis", "reflection", "thought", "learning" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "", "reasoning": "Packer repeatedly insists that theology must terminate in doxology, not intellectual self-congratulation. The foil 'knowledge' is precisely what Packer warns against when it becomes an end in itself \u2014 the difference between knowing about God and knowing God. If the model ranks 'knowledge' above 'worship,' it has learned the academic register without capturing Packer's normative turn toward devotional response. 'Adoration' is acceptable as a near-synonym for worship in this doxological context.", "surface_confounder": "knowledge" }, { "id": "CON_083", "type": "contrastive_theology", "category": "christology", "difficulty": "hard", "input": "In the incarnation, the Son of God did not cease to be God but [MASK] a full human nature without surrendering his divine attributes.", "targets": [ "assumed" ], "foils": [ "became" ], "acceptable_alternatives": [ "took" ], "failure_examples": [ "lost", "abandoned", "replaced", "transformed", "created" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "", "reasoning": "Packer affirms Chalcedonian incarnational Christology: the Son assumed (took on) human nature without conversion or confusion of natures. The foil 'became' risks implying a transformation or replacement of the divine nature \u2014 a kenotic error Packer would reject. The technical patristic verb is 'assumed.' A model ranking 'became' above 'assumed' has learned casual incarnational language without the precision Packer and the Chalcedonian tradition require. 'Took' is acceptable as a less technical variant of the same idea.", "surface_confounder": "became" }, { "id": "DOC_099", "type": "doctrinal_association", "category": "soteriology", "difficulty": "medium", "input": "The Son of Man came to seek and to [MASK] the lost.", "targets": [ "save" ], "foils": [], "acceptable_alternatives": [ "rescue" ], "failure_examples": [ "judge", "punish", "find", "condemn", "teach" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Luke 19:10", "reasoning": "Luke 19:10 is the thesis statement of Luke's Gospel \u2014 Jesus' mission is soteriological rescue. 'Save' must beat 'find' despite the search metaphor in 'seek.' A model predicting 'find' has learned the parable surface but not the salvific purpose. 'Judge' or 'condemn' would indicate inverted doctrinal polarity for Luke's portrait of Jesus.", "surface_confounder": "find" }, { "id": "DOC_100", "type": "doctrinal_association", "category": "christology", "difficulty": "medium", "input": "The angel said to Mary, 'The Holy Spirit will come upon you, and the power of the Most High will [MASK] you.'", "targets": [ "overshadow" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "destroy", "bless", "fill", "empower", "consume" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Luke 1:35", "reasoning": "The annunciation uses 'overshadow' as a theologically loaded term evoking the Shekinah glory cloud over the tabernacle. 'Fill' or 'empower' reflect generic pneumatology rather than the incarnational theology of this passage. Predicting 'overshadow' demonstrates the model has learned the specific vocabulary of the virginal conception narrative.", "surface_confounder": "empower" }, { "id": "DOC_101", "type": "doctrinal_association", "category": "pneumatology", "difficulty": "easy", "input": "Jesus returned to Galilee in the [MASK] of the Spirit, and news about him spread throughout the region.", "targets": [ "power" ], "foils": [], "acceptable_alternatives": [ "strength" ], "failure_examples": [ "name", "absence", "shadow", "light", "presence" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Luke 4:14", "reasoning": "Luke emphasizes Spirit-empowerment as the engine of Jesus' ministry. 'Power of the Spirit' is a high-frequency Lukan phrase. Predicting 'name' would indicate confusion with Acts-style apostolic formulas. 'Presence' is theologically adjacent but not the canonical term Luke uses to describe Spirit-driven ministry.", "surface_confounder": "presence" }, { "id": "DOC_102", "type": "doctrinal_association", "category": "soteriology", "difficulty": "hard", "input": "But the tax collector stood at a distance and would not even lift his eyes to heaven, but beat his breast saying, 'God, be [MASK] to me, a sinner.'", "targets": [ "merciful" ], "foils": [], "acceptable_alternatives": [ "gracious" ], "failure_examples": [ "faithful", "just", "kind", "true", "near" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "Luke 18:13", "reasoning": "The Greek hilastheti carries propitiation overtones \u2014 the tax collector asks God to be 'merciful' in the atoning sense. 'Kind' or 'faithful' are generic divine attributes lacking soteriological precision. Predicting 'merciful' at top-3 demonstrates the model has internalized the penitential vocabulary of Lukan soteriology and the parable's justification-by-humility theme.", "surface_confounder": "kind" }, { "id": "DOC_103", "type": "doctrinal_association", "category": "theology_proper", "difficulty": "medium", "input": "My soul [MASK] the Lord, and my spirit rejoices in God my Savior.", "targets": [ "magnifies" ], "foils": [], "acceptable_alternatives": [ "glorifies", "praises", "exalts" ], "failure_examples": [ "questions", "fears", "needs", "forgets", "denies" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Luke 1:46-47", "reasoning": "The Magnificat opens with 'magnifies,' a term of doxological intensification. The parallel structure with 'rejoices' narrows the semantic field to worship vocabulary. A model predicting 'needs' or 'fears' has failed to learn that Lukan canticles are doxological, not petitionary. 'Praises' is acceptable but 'magnifies' is the canonical ESV/NASB term.", "surface_confounder": "" }, { "id": "DOC_104", "type": "doctrinal_association", "category": "soteriology", "difficulty": "medium", "input": "Today [MASK] has come to this house, since he also is a son of Abraham.", "targets": [ "salvation" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "judgment", "punishment", "peace", "hope", "blessing" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Luke 19:9", "reasoning": "In the Zacchaeus narrative, Jesus declares 'salvation has come to this house,' making salvation a present-tense event tied to Jesus' physical arrival. 'Blessing' is too generic; 'peace' misses the Lukan emphasis on realized soteriology. The model must associate Jesus' personal arrival with the coming of salvation itself.", "surface_confounder": "blessing" }, { "id": "DOC_105", "type": "doctrinal_association", "category": "christology", "difficulty": "easy", "input": "For unto you is born this day in the city of David a [MASK], who is Christ the Lord.", "targets": [ "savior" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "prophet", "teacher", "warrior", "priest", "judge" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Luke 2:11", "reasoning": "The angelic birth announcement assigns three titles: Savior, Christ, Lord. The syntactic slot before 'who is Christ the Lord' requires the first title. 'Prophet' reflects an inadequate Christology that Luke's narrative explicitly transcends. Top-10 retrieval of 'savior' is expected for any model with basic nativity-text exposure.", "surface_confounder": "king" }, { "id": "DOC_106", "type": "doctrinal_association", "category": "hamartiology", "difficulty": "hard", "input": "The older brother was angry and refused to go in, resenting the father's [MASK] toward the prodigal who had squandered his inheritance.", "targets": [ "grace" ], "foils": [], "acceptable_alternatives": [ "mercy", "compassion" ], "failure_examples": [ "anger", "justice", "silence", "wealth", "punishment" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "Luke 15:28", "reasoning": "The prodigal son parable contrasts the father's grace with the elder brother's self-righteousness. The sentence frames the father's response as the object of resentment \u2014 'justice' or 'punishment' would invert the parable's point. Predicting 'grace' at top-3 shows the model grasps that the elder brother's complaint is against unmerited favor, not against fairness.", "surface_confounder": "justice" }, { "id": "DOC_107", "type": "doctrinal_association", "category": "eschatology", "difficulty": "medium", "input": "The kingdom of God is not coming in ways that can be observed, for the kingdom of God is in your [MASK].", "targets": [ "midst" ], "foils": [], "acceptable_alternatives": [ "hearts" ], "failure_examples": [ "future", "dreams", "absence", "imagination", "past" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Luke 17:20-21", "reasoning": "This passage is central to realized eschatology debates. 'Midst' (ESV/NASB) reflects the inaugurated-kingdom reading; 'hearts' reflects an internalized-kingdom reading. Both are acceptable translations of entos hymon. 'Future' would indicate the model defaults to futurist eschatology and misses the 'already' dimension of Jesus' kingdom teaching in Luke.", "surface_confounder": "future" }, { "id": "DOC_108", "type": "doctrinal_association", "category": "pneumatology", "difficulty": "medium", "input": "The Spirit of the Lord is upon me, because he has [MASK] me to proclaim good news to the poor.", "targets": [ "anointed" ], "foils": [], "acceptable_alternatives": [ "appointed" ], "failure_examples": [ "forced", "cursed", "abandoned", "tested", "forbidden" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Luke 4:18", "reasoning": "Jesus' Nazareth sermon quotes Isaiah 61 and applies it to himself. 'Anointed' is the messianic-pneumatological term linking Spirit-empowerment to the Christ (anointed one) title. 'Appointed' is close but lacks the anointing-Christology connection. 'Forced' or 'cursed' would indicate severe contextual failure.", "surface_confounder": "sent" }, { "id": "DOC_109", "type": "doctrinal_association", "category": "soteriology", "difficulty": "hard", "input": "I tell you, this man went down to his house [MASK] rather than the other, for everyone who exalts himself will be humbled.", "targets": [ "justified" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "condemned", "punished", "ashamed", "proud", "forgiven" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "Luke 18:14", "reasoning": "The parable of the Pharisee and tax collector concludes with forensic justification language. 'Forgiven' is soteriologically adjacent but misses the Pauline-compatible forensic declaration Luke uses. 'Justified' at top-3 shows the model has learned the declarative legal sense of the term rather than defaulting to a softer synonym. This is the only Lukan parable using dikaioo.", "surface_confounder": "forgiven" }, { "id": "DOC_110", "type": "doctrinal_association", "category": "christology", "difficulty": "easy", "input": "Jesus said to him, 'Truly, I say to you, today you will be with me in [MASK].'", "targets": [ "paradise" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "hell", "darkness", "prison", "judgment", "purgatory" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Luke 23:43", "reasoning": "Jesus' promise to the penitent thief is among the most recognized crucifixion sayings. 'Paradise' is the sole canonical term. 'Purgatory' would indicate Catholic-Protestant confusion in the model's theological representations. 'Hell' or 'judgment' would signal complete soteriological inversion. Top-10 retrieval is expected for basic biblical literacy.", "surface_confounder": "" }, { "id": "DOC_111", "type": "doctrinal_association", "category": "theology_proper", "difficulty": "medium", "input": "He has scattered those who are proud in the thoughts of their hearts; he has brought down rulers from their [MASK].", "targets": [ "thrones" ], "foils": [], "acceptable_alternatives": [ "seats" ], "failure_examples": [ "homes", "beds", "camps", "ships", "gardens" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Luke 1:51-52", "reasoning": "The Magnificat describes God's sovereign reversal of power structures. 'Thrones' is the canonical term representing established political authority. The model must associate 'brought down rulers' with the symbol of royal power. 'Homes' or other generic nouns would indicate the model lacks the political-theology vocabulary of the Lukan canticles.", "surface_confounder": "" }, { "id": "DOC_112", "type": "doctrinal_association", "category": "ecclesiology", "difficulty": "hard", "input": "And he took bread, gave thanks and broke it, and gave it to them, saying, 'This is my [MASK], which is given for you.'", "targets": [ "body" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "blood", "spirit", "soul", "word", "gift" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "Luke 22:19", "reasoning": "The Last Supper institution narrative pairs bread with body and cup with blood. The syntactic frame after 'took bread...broke it' demands 'body' \u2014 not 'blood,' which belongs to the cup saying. A model confusing the bread-body and cup-blood pairings reveals it has not learned the internal structure of the eucharistic formula. Top-3 is required because 'blood' is a strong contextual competitor.", "surface_confounder": "blood" }, { "id": "CAN_065", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "easy", "input": "And she gave birth to her firstborn [MASK] and wrapped him in swaddling cloths and laid him in a manger.", "targets": [ "son" ], "foils": [], "acceptable_alternatives": [ "child" ], "failure_examples": [ "daughter", "servant", "lamb", "prophet", "king" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Luke 2:7", "reasoning": "The nativity verse is among the most recited in the Christian canon. 'Son' is the expected canonical term, with 'child' as an acceptable variant. 'Lamb' would indicate bleed-over from Johannine symbolism. Top-10 failure would signal the model has no internalized representation of the Lukan birth narrative.", "surface_confounder": "" }, { "id": "CAN_066", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "Blessed is she who believed that there would be a [MASK] of what was spoken to her from the Lord.", "targets": [ "fulfillment" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "denial", "end", "reversal", "delay", "rejection" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Luke 1:45", "reasoning": "Elizabeth's blessing of Mary uses 'fulfillment' (teleiosis) \u2014 linking faith to the certainty of God's promises. 'Delay' or 'denial' would invert the passage's confident tone. Correct prediction at top-5 shows the model has internalized the promise-fulfillment schema that structures Luke's infancy narrative and connects Mary's faith to prophetic realization.", "surface_confounder": "end" }, { "id": "CAN_067", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "Fear not, for behold, I bring you good [MASK] of great joy that will be for all the people.", "targets": [ "news" ], "foils": [], "acceptable_alternatives": [ "tidings" ], "failure_examples": [ "warnings", "laws", "judgment", "commands", "signs" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Luke 2:10", "reasoning": "The angelic announcement to the shepherds uses euangelizomai \u2014 'good news' \u2014 which is Luke's signature verb. 'Tidings' is the KJV variant. 'Warnings' or 'judgment' would indicate the model associates angelic appearances with threat rather than gospel proclamation, missing Luke's characteristic joy motif.", "surface_confounder": "" }, { "id": "CAN_068", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "easy", "input": "Glory to God in the highest, and on earth [MASK] among those with whom he is pleased.", "targets": [ "peace" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "war", "judgment", "fire", "death", "chaos" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Luke 2:14", "reasoning": "The Gloria in Excelsis is a liturgically embedded text with extremely high corpus frequency. 'Peace on earth' is culturally ubiquitous beyond even religious contexts. Failure at top-10 would indicate the model has not absorbed even the most basic nativity vocabulary.", "surface_confounder": "" }, { "id": "CAN_069", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "hard", "input": "Lord, now you are letting your servant depart in [MASK], according to your word.", "targets": [ "peace" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "sorrow", "fear", "haste", "shame", "anger" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "Luke 2:29", "reasoning": "The Nunc Dimittis is Simeon's departure prayer. 'Peace' is the canonical term (ESV/NASB). Despite 'peace' being a common word, the archaic phrasing 'depart in peace' requires the model to have specific exposure to Simeon's canticle. 'Sorrow' or 'fear' would indicate inverted emotional valence for what is a joyful recognition scene. Top-3 is demanded because the phrasing is less culturally ubiquitous than Luke 2:14.", "surface_confounder": "silence" }, { "id": "CAN_070", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "A certain man was going down from Jerusalem to Jericho, and he fell among [MASK], who stripped him and beat him.", "targets": [ "robbers" ], "foils": [], "acceptable_alternatives": [ "thieves" ], "failure_examples": [ "soldiers", "priests", "wolves", "demons", "pharisees" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Luke 10:30", "reasoning": "The Good Samaritan parable opens with robbery on the Jericho road. 'Robbers' (ESV) and 'thieves' (KJV) are both acceptable. 'Soldiers' or 'priests' would indicate the model is confusing the parable's characters \u2014 the priest and Levite pass by but are not the assailants. Correct recall shows the model has learned the narrative structure of this uniquely Lukan parable.", "surface_confounder": "soldiers" }, { "id": "CAN_071", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "And he arose and came to his father. But while he was still a long way off, his father saw him and felt [MASK], and ran and embraced him.", "targets": [ "compassion" ], "foils": [], "acceptable_alternatives": [ "pity" ], "failure_examples": [ "anger", "shame", "disgust", "fear", "nothing" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Luke 15:20", "reasoning": "The prodigal son's return pivots on the father's splanchnizomai \u2014 visceral compassion. 'Anger' would completely invert the parable's grace theology. 'Shame' reflects a cultural-honor reading alien to the passage's intent. Correct prediction of 'compassion' demonstrates the model has encoded the emotional vocabulary of Lukan parables of divine mercy.", "surface_confounder": "anger" }, { "id": "CAN_072", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "easy", "input": "And Mary said, 'Behold, I am the servant of the Lord; let it be to me according to your [MASK].'", "targets": [ "word" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "will", "law", "power", "anger", "plan" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Luke 1:38", "reasoning": "Mary's fiat is a cornerstone of Lukan theology \u2014 submission to God's revealed word. 'Will' is theologically adjacent but not the canonical term; the passage emphasizes God's spoken word through the angel. 'Law' would indicate confusion between Mosaic and prophetic categories. Top-10 retrieval tests basic familiarity with the annunciation dialogue.", "surface_confounder": "will" }, { "id": "CAN_073", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "hard", "input": "And Jesus said to him, 'Today salvation has come to this house, since he also is a son of [MASK].'", "targets": [ "abraham" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "david", "moses", "adam", "god", "man" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "Luke 19:9", "reasoning": "Jesus calls Zacchaeus 'a son of Abraham,' invoking covenant membership despite his despised tax-collector status. 'David' is a strong confounder because of messianic associations, but the Abrahamic covenant \u2014 not Davidic kingship \u2014 is the operative category for inclusion of outcasts. Top-3 is required because 'David' and 'God' will compete strongly.", "surface_confounder": "david" }, { "id": "CON_084", "type": "contrastive_theology", "category": "soteriology", "difficulty": "hard", "input": "The prodigal son returned home and was received with [MASK] by his father, not because he earned it but because of the father's love.", "targets": [ "grace" ], "foils": [ "justice" ], "acceptable_alternatives": [ "mercy", "compassion" ], "failure_examples": [ "punishment", "anger", "indifference", "scorn" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Luke 15:20-24", "reasoning": "The prodigal son parable is a grace-versus-merit narrative. The foil 'justice' is what the elder brother demands and what the prodigal himself expected. A model confusing grace and justice here reveals it has not learned the parable's central inversion: the father's response is explicitly not what is deserved. 'Grace' beating 'justice' demonstrates proper soteriological encoding of Luke 15.", "surface_confounder": "justice" }, { "id": "CON_085", "type": "contrastive_theology", "category": "christology", "difficulty": "medium", "input": "Simeon declared that the child Jesus was set for the fall and rising of many in Israel, a sign that will be [MASK].", "targets": [ "opposed" ], "foils": [ "accepted" ], "acceptable_alternatives": [ "rejected" ], "failure_examples": [ "ignored", "forgotten", "praised", "hidden" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Luke 2:34", "reasoning": "Simeon's prophecy frames Jesus as a divisive sign \u2014 'opposed' (antilegomenon). The foil 'accepted' represents a triumphalist Christology that skips the suffering-servant motif. Luke's theology of the cross requires the model to associate the infant Jesus with future opposition, not universal acclaim. Correct ranking reveals learned awareness of the Lukan passion trajectory embedded already in the infancy narrative.", "surface_confounder": "accepted" }, { "id": "CON_086", "type": "contrastive_theology", "category": "soteriology", "difficulty": "hard", "input": "The Pharisee stood and prayed about his own righteousness, but the tax collector asked for [MASK] and went home justified.", "targets": [ "mercy" ], "foils": [ "reward" ], "acceptable_alternatives": [ "grace", "pardon" ], "failure_examples": [ "money", "honor", "power", "fame" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Luke 18:10-14", "reasoning": "The Pharisee-and-tax-collector parable contrasts merit-based prayer with mercy-based prayer. The foil 'reward' captures the Pharisee's transactional theology. A model ranking 'reward' above 'mercy' has inverted the parable's soteriological lesson \u2014 justification comes through humble petition, not earned merit. The proximity of 'righteousness' in the sentence makes 'reward' a plausible surface completion.", "surface_confounder": "reward" }, { "id": "CON_087", "type": "contrastive_theology", "category": "christology", "difficulty": "medium", "input": "Jesus taught that the Son of Man must [MASK] and on the third day be raised.", "targets": [ "suffer" ], "foils": [ "reign" ], "acceptable_alternatives": [ "die" ], "failure_examples": [ "escape", "hide", "sleep", "fast" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Luke 9:22", "reasoning": "The first passion prediction in Luke requires 'suffer' \u2014 the dei (divine necessity) of messianic suffering. The foil 'reign' represents a glory-theology that bypasses the cross. Luke's travel narrative is structured around this suffering-Son-of-Man motif. A model ranking 'reign' above 'suffer' has learned triumphalist Christology but not the theologia crucis central to Luke's narrative arc.", "surface_confounder": "reign" }, { "id": "CON_088", "type": "contrastive_theology", "category": "hamartiology", "difficulty": "medium", "input": "The rich man who ignored Lazarus at his gate discovered after death that his [MASK] had sealed his eternal fate.", "targets": [ "sin" ], "foils": [ "wealth" ], "acceptable_alternatives": [ "unbelief", "indifference" ], "failure_examples": [ "success", "family", "health", "status" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Luke 16:19-31", "reasoning": "The rich man and Lazarus parable is often misread as condemning wealth itself. The foil 'wealth' captures this misreading \u2014 Luke's point is hamartiological: the sin of callous indifference, not the mere possession of money. A model ranking 'wealth' above 'sin' reveals a prosperity-gospel confusion. 'Sin' or 'indifference' shows the model grasps the moral-culpability frame.", "surface_confounder": "wealth" }, { "id": "CON_089", "type": "contrastive_theology", "category": "theology_proper", "difficulty": "hard", "input": "In the parable of the persistent widow, Jesus taught that God will grant [MASK] to his chosen ones who cry out to him day and night.", "targets": [ "justice" ], "foils": [ "vengeance" ], "acceptable_alternatives": [ "vindication" ], "failure_examples": [ "wealth", "power", "silence", "punishment" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Luke 18:7-8", "reasoning": "The persistent widow parable teaches about God's faithful justice toward his people. The foil 'vengeance' shifts the passage from divine vindication of the oppressed to retributive violence. Luke's theology of justice is restorative, not vengeful. A model ranking 'vengeance' above 'justice' has confused eschatological vindication with retaliatory anger, missing the Lukan emphasis on God as righteous advocate.", "surface_confounder": "vengeance" }, { "id": "CON_090", "type": "contrastive_theology", "category": "soteriology", "difficulty": "medium", "input": "Jesus told the sinful woman, 'Your [MASK] has saved you; go in peace.'", "targets": [ "faith" ], "foils": [ "love" ], "acceptable_alternatives": [], "failure_examples": [ "beauty", "tears", "suffering", "money", "works" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Luke 7:50", "reasoning": "In the anointing scene, the woman's love is evidence of forgiveness already received, but Jesus attributes her salvation to faith, not love. The foil 'love' represents a works-based misreading \u2014 her loving acts were fruit, not root. A model ranking 'love' above 'faith' has confused the consequence of salvation with its instrument, inverting the Lukan ordo salutis.", "surface_confounder": "love" }, { "id": "CON_091", "type": "contrastive_theology", "category": "pneumatology", "difficulty": "hard", "input": "Jesus told the disciples to wait in Jerusalem until they were clothed with [MASK] from on high.", "targets": [ "power" ], "foils": [ "knowledge" ], "acceptable_alternatives": [ "strength" ], "failure_examples": [ "wealth", "armor", "glory", "robes" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Luke 24:49", "reasoning": "The pre-Pentecost command frames the Spirit's coming as empowerment, not information transfer. The foil 'knowledge' represents a gnostic misunderstanding \u2014 the disciples already knew the resurrection facts; they needed power. A model ranking 'knowledge' above 'power' reveals confusion between pneumatological empowerment and intellectual enlightenment, missing Luke's dynamic Spirit theology.", "surface_confounder": "knowledge" }, { "id": "CON_092", "type": "contrastive_theology", "category": "eschatology", "difficulty": "medium", "input": "Jesus declared that the kingdom of God does not come through [MASK] but is already present among you.", "targets": [ "observation" ], "foils": [ "force" ], "acceptable_alternatives": [], "failure_examples": [ "prayer", "faith", "love", "worship" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Luke 17:20", "reasoning": "Luke 17:20 uses parateresis \u2014 careful observation or calculation. The foil 'force' represents a political-zealot eschatology that expects the kingdom through military action. While both are contrasted with the kingdom's true nature, 'observation' is the canonical term. A model ranking 'force' above 'observation' has conflated Luke's anti-calculation warning with the separate anti-violence theme.", "surface_confounder": "force" }, { "id": "CON_093", "type": "contrastive_theology", "category": "christology", "difficulty": "hard", "input": "At his trial, Jesus was asked if he was the Christ, and he affirmed his identity as the Son of [MASK].", "targets": [ "god" ], "foils": [ "man" ], "acceptable_alternatives": [], "failure_examples": [ "david", "abraham", "moses", "joseph" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Luke 22:70", "reasoning": "In Luke's trial narrative, the Sanhedrin asks 'Are you the Son of God?' and Jesus affirms. The foil 'man' is the other Son-of title Jesus uses frequently (Son of Man). A model ranking 'man' above 'god' here has confused the two distinct Christological titles. In this specific trial context, the divine-sonship claim is the one that triggers the blasphemy charge. The model must distinguish which title applies in which narrative moment.", "surface_confounder": "man" }, { "id": "CON_094", "type": "contrastive_theology", "category": "soteriology", "difficulty": "medium", "input": "Jesus said that those who are well have no need of a physician, for he came to call not the [MASK] but sinners to repentance.", "targets": [ "righteous" ], "foils": [ "sick" ], "acceptable_alternatives": [], "failure_examples": [ "wealthy", "powerful", "wise", "chosen" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Luke 5:31-32", "reasoning": "Jesus' response to criticism about eating with sinners uses the physician metaphor. The foil 'sick' follows the physical metaphor but misses the shift to moral categories \u2014 Jesus pivots from 'well/sick' to 'righteous/sinners.' A model ranking 'sick' above 'righteous' has failed to track the metaphor-to-theology transition within the same saying, staying at surface level rather than grasping the soteriological point.", "surface_confounder": "sick" }, { "id": "CON_095", "type": "contrastive_theology", "category": "eschatology", "difficulty": "medium", "input": "He is not the God of the dead, but of the [MASK], for all live to him.", "targets": [ "living" ], "foils": [ "righteous" ], "acceptable_alternatives": [], "failure_examples": [ "chosen", "worthy", "faithful", "elect" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Luke 20:38", "reasoning": "Jesus' argument for the resurrection against the Sadducees hinges on 'God of the living.' The foil 'righteous' narrows the scope \u2014 it implies only the morally qualified have resurrection hope, whereas 'living' establishes the universal metaphysical point that covenant relationship implies continued existence. A model ranking 'righteous' above 'living' conflates resurrection theology with moral categorization.", "surface_confounder": "righteous" }, { "id": "DOC_113", "type": "doctrinal_association", "category": "christology", "difficulty": "medium", "input": "Jesus asked his disciples, Who do you say that I am? Peter answered, You are the [MASK].", "targets": [ "christ" ], "foils": [], "acceptable_alternatives": [ "messiah" ], "failure_examples": [ "prophet", "teacher", "lord", "king", "rabbi" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Mark 8:29", "reasoning": "Peter's confession at Caesarea Philippi is the christological climax of Mark's Gospel. The surrounding context of 'Who do you say that I am' should strongly activate 'Christ' or 'Messiah' over generic titles like 'prophet' or 'teacher'. A model predicting 'prophet' reveals it has not learned the confessional vocabulary that distinguishes messianic identity from prophetic office in Mark.", "surface_confounder": "prophet" }, { "id": "DOC_114", "type": "doctrinal_association", "category": "soteriology", "difficulty": "medium", "input": "For even the Son of Man came not to be served but to serve, and to give his life as a [MASK] for many.", "targets": [ "ransom" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "gift", "lesson", "sign", "model", "blessing" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Mark 10:45", "reasoning": "Mark 10:45 is the key soteriological verse in Mark, establishing the substitutionary atonement framework. 'Ransom' is a precise term from redemption theology \u2014 it implies a price paid for liberation. A model predicting 'gift' or 'sacrifice' has learned the general soteriological domain but not the specific Markan ransom motif that grounds penal substitution.", "surface_confounder": "sacrifice" }, { "id": "DOC_115", "type": "doctrinal_association", "category": "christology", "difficulty": "hard", "input": "In Mark's Gospel, Jesus frequently commands silence about his identity, a pattern scholars call the messianic [MASK].", "targets": [ "secret" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "mission", "kingdom", "plan", "message", "promise" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "", "reasoning": "The 'messianic secret' is a foundational Markan theme identified by Wrede. The bigram 'messianic ___' should strongly collocate with 'secret' in any corpus that includes biblical scholarship. A model that predicts 'mission' or 'kingdom' shows it has learned messianic vocabulary broadly but not the specific Markan literary motif.", "surface_confounder": "mission" }, { "id": "DOC_116", "type": "doctrinal_association", "category": "soteriology", "difficulty": "medium", "input": "Jesus said to the paralytic, Son, your [MASK] are forgiven.", "targets": [ "sins" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "debts", "wounds", "crimes", "legs", "troubles" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Mark 2:5", "reasoning": "The healing of the paralytic in Mark 2 is primarily a forgiveness pericope, not a healing story. The phrase 'are forgiven' in declarative divine voice should strongly activate 'sins' as the object. A model predicting 'debts' has learned petition language (Lord's Prayer) rather than the declarative absolution formula. This tests whether the model distinguishes between forgiveness-as-petition and forgiveness-as-pronouncement.", "surface_confounder": "debts" }, { "id": "DOC_117", "type": "doctrinal_association", "category": "theology_proper", "difficulty": "easy", "input": "And a voice came from heaven, You are my beloved [MASK], with you I am well pleased.", "targets": [ "son" ], "foils": [], "acceptable_alternatives": [ "child" ], "failure_examples": [ "prophet", "servant", "angel", "friend", "king" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Mark 1:11", "reasoning": "The baptismal theophany in Mark 1:11 establishes Jesus's divine sonship at the Gospel's outset. 'Beloved Son' is a high-frequency phrase in gospel corpora. Failure to predict 'son' in top-10 would indicate the model has not internalized the most basic trinitarian vocabulary from the Gospels. 'Servant' as a top prediction would conflate Markan sonship language with Isaianic servant language.", "surface_confounder": "servant" }, { "id": "DOC_118", "type": "doctrinal_association", "category": "eschatology", "difficulty": "medium", "input": "And then they will see the Son of Man coming in clouds with great power and [MASK].", "targets": [ "glory" ], "foils": [], "acceptable_alternatives": [ "majesty" ], "failure_examples": [ "anger", "force", "speed", "thunder", "fire" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Mark 13:26", "reasoning": "Mark 13:26 echoes Daniel 7:13-14 and is central to Mark's eschatological discourse. The pairing of 'power and glory' is a fixed formula in parousia passages. A model that predicts 'anger' or 'wrath' has confused the second coming with judgment-only eschatology, missing the Danielic enthronement-glory motif that Mark employs.", "surface_confounder": "wrath" }, { "id": "DOC_119", "type": "doctrinal_association", "category": "christology", "difficulty": "hard", "input": "The Son of Man has [MASK] on earth to forgive sins, Jesus declared to the scribes.", "targets": [ "authority" ], "foils": [], "acceptable_alternatives": [ "power" ], "failure_examples": [ "come", "chosen", "decided", "promised", "risen" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "Mark 2:10", "reasoning": "Authority (exousia) is a defining christological category in Mark \u2014 Jesus teaches with authority (1:22), has authority to forgive (2:10), and has authority over demons (1:27). The specific collocation 'has ___ to forgive sins' should activate 'authority' over generic verbs like 'come' or 'chosen'. This tests whether the model has learned the Markan authority-christology framework.", "surface_confounder": "come" }, { "id": "DOC_120", "type": "doctrinal_association", "category": "pneumatology", "difficulty": "medium", "input": "Jesus warned that whoever blasphemes against the Holy [MASK] never has forgiveness but is guilty of an eternal sin.", "targets": [ "spirit" ], "foils": [], "acceptable_alternatives": [ "ghost" ], "failure_examples": [ "name", "god", "word", "law", "temple" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Mark 3:29", "reasoning": "The blasphemy against the Holy Spirit in Mark 3:29 is a key pneumatological warning passage. 'Holy ___' in a theological context should strongly collocate with 'Spirit'. A model predicting 'name' or 'God' reveals it has not learned the trinitarian person-language that distinguishes 'Holy Spirit' as a title from generic holiness attributions.", "surface_confounder": "" }, { "id": "DOC_121", "type": "doctrinal_association", "category": "hamartiology", "difficulty": "hard", "input": "For from within, out of the [MASK] of man, come evil thoughts and all manner of sin.", "targets": [ "heart" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "mind", "body", "mouth", "soul", "world" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "Mark 7:21", "reasoning": "Mark 7:21 locates the origin of sin internally in the heart, not in external defilement. This is a key hamartiology text establishing total depravity of the inner person. 'Mind' as a top prediction would reflect Greek philosophical anthropology rather than the biblical Hebrew-rooted concept of the heart (kardia/leb) as the seat of moral volition. The model must distinguish 'heart' from 'mind' in sin-origin contexts.", "surface_confounder": "mind" }, { "id": "DOC_122", "type": "doctrinal_association", "category": "ecclesiology", "difficulty": "easy", "input": "Jesus said, Go into all the world and preach the [MASK] to all creation.", "targets": [ "gospel" ], "foils": [], "acceptable_alternatives": [ "word" ], "failure_examples": [ "law", "truth", "message", "commandments", "scripture" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Mark 16:15", "reasoning": "The Great Commission in Mark 16:15 uses 'gospel' (euangelion), which is Mark's signature term appearing from the very first verse. 'Preach the gospel' is among the highest-frequency collocations in evangelical corpora. Failure to predict 'gospel' in top-10 indicates the model has minimal exposure to commissioning language.", "surface_confounder": "" }, { "id": "CAN_074", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "easy", "input": "The beginning of the gospel of Jesus Christ, the [MASK] of God.", "targets": [ "son" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "word", "servant", "lamb", "prophet", "messenger" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Mark 1:1", "reasoning": "Mark 1:1 is the Gospel's opening declaration and thesis statement. 'Son of God' in this superscription establishes the christological frame for the entire narrative. This is a high-frequency verse and 'son' should be strongly predicted. Failure indicates the model lacks basic canonical vocabulary for Mark.", "surface_confounder": "" }, { "id": "CAN_075", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "And Jesus said to them, Follow me, and I will make you become fishers of [MASK].", "targets": [ "men" ], "foils": [], "acceptable_alternatives": [ "people" ], "failure_examples": [ "fish", "souls", "nations", "truth", "god" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Mark 1:17", "reasoning": "The calling of the first disciples in Mark 1:17 uses the distinctive metaphor 'fishers of men'. The phrase 'fishers of ___' is a canonical collocation. A model predicting 'fish' has inverted the metaphor entirely, while 'souls' represents a later theological abstraction not present in the text. This tests precise canonical recall versus theological paraphrase.", "surface_confounder": "souls" }, { "id": "CAN_076", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "He taught them as one who had authority, and not as the [MASK].", "targets": [ "scribes" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "priests", "prophets", "pharisees", "teachers", "elders" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Mark 1:22", "reasoning": "Mark 1:22 contrasts Jesus's authoritative teaching with that of the scribes specifically. The model must recall that the canonical contrast group is 'scribes', not 'Pharisees' or 'priests'. Predicting 'Pharisees' indicates the model has blurred the distinction between the different Jewish leadership groups that Mark carefully differentiates.", "surface_confounder": "pharisees" }, { "id": "CAN_077", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "hard", "input": "And the curtain of the [MASK] was torn in two, from top to bottom.", "targets": [ "temple" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "sky", "room", "gate", "wall", "tent" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "Mark 15:38", "reasoning": "The tearing of the temple curtain at Jesus's death is a climactic moment in Mark's passion narrative signifying the end of the old covenant barrier. 'Curtain of the temple' is the canonical phrase. A model predicting 'sky' has confused this with apocalyptic imagery. This tests whether the model has learned the specific passion narrative vocabulary of Mark 15.", "surface_confounder": "" }, { "id": "CAN_078", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "easy", "input": "The time is fulfilled, and the [MASK] of God is at hand; repent and believe in the gospel.", "targets": [ "kingdom" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "wrath", "word", "glory", "love", "son" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Mark 1:15", "reasoning": "Mark 1:15 is Jesus's programmatic opening declaration. 'Kingdom of God' is the most frequent theological phrase in the Synoptic Gospels and the phrase '___ of God is at hand' is a very strong canonical collocation. Failure to predict 'kingdom' in top-10 indicates near-total absence of gospel vocabulary.", "surface_confounder": "" }, { "id": "CAN_079", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "And he said, Abba, Father, all things are possible for you. Remove this [MASK] from me.", "targets": [ "cup" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "burden", "pain", "curse", "trial", "punishment" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Mark 14:36", "reasoning": "The Gethsemane prayer in Mark 14:36 uses 'cup' as the canonical metaphor for the suffering and wrath Jesus is about to bear. 'Remove this cup' is a fixed phrase in passion narrative recall. A model predicting 'burden' or 'pain' has learned the semantic domain of suffering but not the specific OT-rooted cup-of-wrath metaphor that Mark employs.", "surface_confounder": "burden" }, { "id": "CAN_080", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "hard", "input": "And Jesus cried out with a loud voice, My God, my God, why have you [MASK] me?", "targets": [ "forsaken" ], "foils": [], "acceptable_alternatives": [ "abandoned" ], "failure_examples": [ "forgotten", "cursed", "punished", "tested", "rejected" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "Mark 15:34", "reasoning": "The cry of dereliction from Psalm 22:1 quoted in Mark 15:34 uses 'forsaken' as the precise term. This is theologically loaded \u2014 it expresses covenantal abandonment, not mere forgetfulness. A model predicting 'forgotten' has lost the covenantal weight. 'Forsaken' in top-3 demonstrates the model has learned both the Psalm 22 source text and its passion narrative deployment.", "surface_confounder": "forgotten" }, { "id": "CON_096", "type": "contrastive_theology", "category": "christology", "difficulty": "hard", "input": "When the centurion saw how Jesus died, he said, Truly this man was the [MASK] of God.", "targets": [ "son" ], "foils": [ "prophet" ], "acceptable_alternatives": [], "failure_examples": [ "servant", "messenger", "teacher", "king", "lamb" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Mark 15:39", "reasoning": "The centurion's confession in Mark 15:39 is the christological climax of the entire Gospel \u2014 a Gentile recognizes Jesus as Son of God at the cross. The foil 'prophet' tests whether the model has learned that Mark reserves the highest christological title for this moment. A model ranking 'prophet' above 'son' has flattened Markan christology to a prophetic-only category, missing the divine sonship theme.", "surface_confounder": "prophet" }, { "id": "CON_097", "type": "contrastive_theology", "category": "soteriology", "difficulty": "medium", "input": "The Son of Man came to give his life as a ransom, not to earn [MASK] by works of the law.", "targets": [ "salvation" ], "foils": [ "merit" ], "acceptable_alternatives": [ "righteousness" ], "failure_examples": [ "wealth", "fame", "power", "reward", "honor" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Mark 10:45", "reasoning": "This tests whether the model correctly associates the ransom motif with grace-based salvation rather than merit-based achievement. The foil 'merit' represents the works-righteousness error that the ransom saying directly contradicts. If 'merit' outranks 'salvation', the model has conflated the service language in Mark 10:45 with an earning framework.", "surface_confounder": "merit" }, { "id": "CON_098", "type": "contrastive_theology", "category": "christology", "difficulty": "hard", "input": "Jesus did not come primarily as a political [MASK] but as a suffering servant who would die for his people.", "targets": [ "king" ], "foils": [ "messiah" ], "acceptable_alternatives": [ "ruler", "leader" ], "failure_examples": [ "priest", "warrior", "judge", "hero", "god" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Mark 10:33-34", "reasoning": "Mark's Gospel redefines messiahship through suffering rather than political triumph. The foil 'messiah' is subtle \u2014 the sentence negates the political qualifier, so the mask should be filled by a political title (king/ruler) that is being denied, not 'messiah' itself. A model ranking 'messiah' higher has failed to parse the negation structure and conflated the category being rejected with the office itself.", "surface_confounder": "messiah" }, { "id": "CON_099", "type": "contrastive_theology", "category": "christology", "difficulty": "medium", "input": "The demons recognized Jesus and cried out because they knew he was the [MASK] one of God.", "targets": [ "holy" ], "foils": [ "chosen" ], "acceptable_alternatives": [], "failure_examples": [ "good", "great", "true", "mighty", "wise" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Mark 1:24", "reasoning": "In Mark 1:24 the unclean spirit identifies Jesus as 'the Holy One of God' \u2014 a christological title emphasizing divine consecration and purity. The foil 'chosen' is plausible in a messianic context but represents election language rather than the ontological holiness attribution the demons use. A model that ranks 'chosen' higher has learned messianic vocabulary but not the specific demonic-recognition formula in Mark.", "surface_confounder": "chosen" }, { "id": "CON_100", "type": "contrastive_theology", "category": "soteriology", "difficulty": "medium", "input": "Jesus said, It is not the healthy who need a doctor but the [MASK], for I came to call sinners.", "targets": [ "sick" ], "foils": [ "weak" ], "acceptable_alternatives": [ "ill" ], "failure_examples": [ "poor", "lost", "broken", "dead", "blind" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Mark 2:17", "reasoning": "Mark 2:17 uses a physician metaphor where 'sick' is the canonical word paired with 'doctor/physician'. The foil 'weak' is semantically adjacent but breaks the medical metaphor that structures the saying. A model ranking 'weak' above 'sick' has learned the general vulnerability domain but not the specific physician-patient frame that grounds Jesus's ministry to sinners.", "surface_confounder": "weak" }, { "id": "CON_101", "type": "contrastive_theology", "category": "eschatology", "difficulty": "hard", "input": "But concerning that day or that hour, no one knows, not even the angels in heaven, nor the [MASK], but only the Father.", "targets": [ "son" ], "foils": [ "spirit" ], "acceptable_alternatives": [], "failure_examples": [ "church", "prophets", "saints", "disciples", "elect" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Mark 13:32", "reasoning": "Mark 13:32 is a critical eschatological text where Jesus claims ignorance of the parousia timing, ascending from angels to the Son to the Father. The foil 'spirit' tests whether the model has learned the specific trinitarian ordering in this verse \u2014 the Holy Spirit is notably absent from the hierarchy here. A model predicting 'spirit' has imported a generic trinitarian formula rather than learning Mark's specific text.", "surface_confounder": "spirit" }, { "id": "CON_102", "type": "contrastive_theology", "category": "christology", "difficulty": "medium", "input": "The chief priests accused Jesus of claiming to be the king of the Jews, but his kingdom was not of this [MASK].", "targets": [ "world" ], "foils": [ "age" ], "acceptable_alternatives": [ "earth" ], "failure_examples": [ "life", "place", "land", "realm", "time" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Mark 15:2", "reasoning": "While the 'not of this world' phrasing is Johannine (John 18:36), the concept runs through Mark's passion narrative where Jesus accepts the 'King of the Jews' title while redefining it. The foil 'age' represents an eschatological two-age framework that, while biblically valid elsewhere, is not the correct term in this spatial-contrast formula. A model predicting 'age' over 'world' has confused spatial and temporal kingdom categories.", "surface_confounder": "age" }, { "id": "CON_103", "type": "contrastive_theology", "category": "christology", "difficulty": "hard", "input": "Jesus told the high priest, You will see the Son of Man seated at the right hand of [MASK] and coming with the clouds of heaven.", "targets": [ "power" ], "foils": [ "god" ], "acceptable_alternatives": [], "failure_examples": [ "glory", "heaven", "majesty", "judgment", "grace" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Mark 14:62", "reasoning": "Mark 14:62 quotes Psalm 110:1 and Daniel 7:13, using the circumlocution 'Power' (dynamis) as a reverential substitute for the divine name, following Jewish practice. The foil 'god' is the referent but not the canonical word \u2014 Mark preserves the circumlocution. A model predicting 'god' over 'power' has the correct theology but wrong lexical form, revealing it has not learned the specific Jewish circumlocution pattern that Mark employs in the trial narrative.", "surface_confounder": "god" }, { "id": "DOC_123", "type": "doctrinal_association", "category": "christology", "difficulty": "medium", "input": "The angel told Joseph that the child conceived in Mary was from the Holy Spirit, and he would save his people from their [MASK].", "targets": [ "sins" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "enemies", "troubles", "rulers", "debts", "pain" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Matthew 1:21", "reasoning": "Matthew 1:21 defines Jesus' mission as soteriological from the outset \u2014 saving his people from sins, not political oppression. A model predicting 'enemies' reveals confusion between messianic expectation and the actual Matthean soteriology. 'Sins' must rank above generic rescue language to show the model encodes the spiritual nature of Christ's saving work.", "surface_confounder": "enemies" }, { "id": "DOC_124", "type": "doctrinal_association", "category": "christology", "difficulty": "medium", "input": "The virgin shall conceive and bear a son, and they shall call his name [MASK], which means God with us.", "targets": [ "emmanuel" ], "foils": [], "acceptable_alternatives": [ "immanuel" ], "failure_examples": [ "jesus", "messiah", "lord", "christ", "michael" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Matthew 1:23", "reasoning": "Matthew's fulfillment quotation from Isaiah 7:14 introduces Emmanuel as a Christological title. The phrase 'God with us' is the definitional gloss that should strongly cue this specific name. A model predicting 'jesus' has learned generic gospel associations but not the specific prophetic-fulfillment vocabulary Matthew employs.", "surface_confounder": "jesus" }, { "id": "DOC_125", "type": "doctrinal_association", "category": "soteriology", "difficulty": "medium", "input": "John the Baptist came preaching in the wilderness, saying [MASK], for the kingdom of heaven is at hand.", "targets": [ "repent" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "rejoice", "pray", "listen", "come", "follow" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Matthew 3:2", "reasoning": "Repentance is the entry condition for kingdom participation in Matthew's theology. The imperative paired with 'kingdom of heaven is at hand' should uniquely activate 'repent' \u2014 the same call Jesus echoes in 4:17. A model predicting 'rejoice' or 'pray' has missed the urgent ethical demand that precedes kingdom entry in Matthean soteriology.", "surface_confounder": "believe" }, { "id": "DOC_126", "type": "doctrinal_association", "category": "theology_proper", "difficulty": "easy", "input": "After Jesus was baptized, the heavens opened and the Spirit of God descended like a [MASK] and came to rest on him.", "targets": [ "dove" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "flame", "cloud", "wind", "eagle", "light" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Matthew 3:16", "reasoning": "The dove is the canonical image of the Spirit's descent at Jesus' baptism across all four Gospels. 'Flame' would indicate confusion with Pentecost imagery, and 'cloud' with theophany traditions. Correct prediction shows the model has encoded the specific baptismal theophany vocabulary rather than generic divine-appearance terms.", "surface_confounder": "flame" }, { "id": "DOC_127", "type": "doctrinal_association", "category": "christology", "difficulty": "hard", "input": "Jesus said: Man shall not live by bread alone, but by every [MASK] that comes from the mouth of God.", "targets": [ "word" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "gift", "blessing", "breath", "command", "law" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "Matthew 4:4", "reasoning": "Jesus' quotation of Deuteronomy 8:3 during the temptation establishes Scripture's sufficiency. 'Word' is the precise canonical term. 'Command' or 'law' are semantically adjacent but miss the broader Matthean logos theology. A top-3 prediction of 'word' shows the model has internalized this high-frequency dominical quotation.", "surface_confounder": "command" }, { "id": "DOC_128", "type": "doctrinal_association", "category": "soteriology", "difficulty": "medium", "input": "Blessed are the poor in [MASK], for theirs is the kingdom of heaven.", "targets": [ "spirit" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "heart", "mind", "soul", "body", "wealth" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Matthew 5:3", "reasoning": "The first Beatitude's 'poor in spirit' is a distinctive Matthean phrase distinguishing spiritual humility from material poverty (cf. Luke's 'poor'). 'Heart' is the strongest confounder as a common biblical locus of spiritual condition. Correct prediction demonstrates the model has encoded the specific Matthean Beatitude formula over generic piety language.", "surface_confounder": "heart" }, { "id": "DOC_129", "type": "doctrinal_association", "category": "eschatology", "difficulty": "medium", "input": "Blessed are the meek, for they shall [MASK] the earth.", "targets": [ "inherit" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "rule", "judge", "conquer", "leave", "fill" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Matthew 5:5", "reasoning": "The Beatitude echoes Psalm 37:11 and its eschatological land promise. 'Inherit' is the precise covenantal term indicating reception by grace, not achievement. 'Rule' or 'conquer' would reveal a triumphalist misreading. Correct prediction shows the model encodes the inheritance-by-promise semantic cluster central to kingdom eschatology.", "surface_confounder": "rule" }, { "id": "DOC_130", "type": "doctrinal_association", "category": "theology_proper", "difficulty": "easy", "input": "Jesus taught his disciples to pray: Our [MASK], who is in heaven, hallowed be your name.", "targets": [ "father" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "lord", "king", "god", "master", "creator" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Matthew 6:9", "reasoning": "The Lord's Prayer opens with the filial address 'Father,' establishing the intimate relational theology that pervades Matthew. 'Lord' or 'king' would indicate the model defaults to sovereignty language over the paternal intimacy Jesus introduces. This is a high-frequency test \u2014 failure would indicate very poor canonical coverage.", "surface_confounder": "" }, { "id": "DOC_131", "type": "doctrinal_association", "category": "soteriology", "difficulty": "hard", "input": "Enter by the [MASK] gate, for the gate is wide and the way is easy that leads to destruction.", "targets": [ "narrow" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "golden", "open", "front", "main", "eastern" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "Matthew 7:13", "reasoning": "The narrow-gate metaphor is central to Matthean discipleship theology, demanding exclusive commitment. The contrastive structure with 'wide' should strongly cue 'narrow.' A model predicting architectural terms like 'golden' or 'eastern' has failed to encode the moral-soteriological contrast Jesus draws. Top-3 accuracy here shows real discourse-level theological understanding.", "surface_confounder": "" }, { "id": "DOC_132", "type": "doctrinal_association", "category": "ecclesiology", "difficulty": "hard", "input": "Go therefore and make disciples of all nations, baptizing them in the name of the Father and of the Son and of the Holy [MASK].", "targets": [ "spirit" ], "foils": [], "acceptable_alternatives": [ "ghost" ], "failure_examples": [ "fire", "word", "light", "one", "lamb" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "Matthew 28:19", "reasoning": "The Great Commission's trinitarian baptismal formula is the foundational ecclesiological text in Matthew. 'Spirit' completing the triad after 'Father' and 'Son' should be unambiguous. 'Ghost' is acceptable as KJV variant. Failure here indicates the model lacks even basic trinitarian formula encoding.", "surface_confounder": "" }, { "id": "DOC_133", "type": "doctrinal_association", "category": "christology", "difficulty": "medium", "input": "Jesus told Peter: On this rock I will build my [MASK], and the gates of hell shall not prevail against it.", "targets": [ "church" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "kingdom", "temple", "house", "throne", "army" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Matthew 16:18", "reasoning": "Matthew 16:18 is the only Gospel passage where Jesus uses 'church' (ekklesia). 'Kingdom' is the strongest confounder since Matthew's dominant theme is kingdom of heaven, but the rock-building metaphor specifically cues ecclesiology. Correct prediction shows the model distinguishes ecclesiological from basileia vocabulary in Matthean discourse.", "surface_confounder": "kingdom" }, { "id": "DOC_134", "type": "doctrinal_association", "category": "hamartiology", "difficulty": "medium", "input": "Jesus said: If your right eye causes you to [MASK], tear it out and throw it away.", "targets": [ "sin" ], "foils": [], "acceptable_alternatives": [ "stumble" ], "failure_examples": [ "cry", "suffer", "bleed", "fear", "doubt" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Matthew 5:29", "reasoning": "Jesus' radical hyperbole about sin avoidance in the Sermon on the Mount uses 'sin' or 'stumble' (skandalizo) as the trigger for self-denial. 'Cry' or 'suffer' would reveal a model that reads the passage as about pain avoidance rather than moral purity. The prediction tests whether the model connects bodily metaphor to hamartiological vocabulary.", "surface_confounder": "suffer" }, { "id": "DOC_135", "type": "doctrinal_association", "category": "eschatology", "difficulty": "hard", "input": "The Son of Man will send his angels, and they will gather out of his kingdom all causes of sin, and throw them into the fiery [MASK].", "targets": [ "furnace" ], "foils": [], "acceptable_alternatives": [ "pit" ], "failure_examples": [ "lake", "river", "darkness", "storm", "desert" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "Matthew 13:41-42", "reasoning": "Matthew uses 'fiery furnace' as a distinctive judgment image in the parable of the weeds, distinct from Revelation's 'lake of fire.' A model predicting 'lake' has conflated Matthean with Johannine eschatological imagery. 'Furnace' in top-3 demonstrates book-specific canonical vocabulary retention.", "surface_confounder": "lake" }, { "id": "DOC_136", "type": "doctrinal_association", "category": "theology_proper", "difficulty": "easy", "input": "Are not two sparrows sold for a penny? And not one of them will fall to the ground apart from your [MASK].", "targets": [ "father" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "will", "lord", "master", "god", "knowledge" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Matthew 10:29", "reasoning": "Jesus' providence teaching uses 'Father' to ground divine sovereignty in paternal care. The sparrow illustration is high-frequency in pastoral contexts. 'Will' as a prediction would be syntactically plausible but theologically imprecise \u2014 Jesus specifies the personal agent (Father), not the abstract attribute.", "surface_confounder": "will" }, { "id": "CAN_081", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "easy", "input": "Now the birth of Jesus Christ took place in this way: when his mother Mary had been betrothed to [MASK], before they came together she was found to be with child.", "targets": [ "joseph" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "david", "herod", "simon", "james", "john" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Matthew 1:18", "reasoning": "Joseph is the named figure in Matthew's birth narrative and the immediate antecedent of 'betrothed to.' This is a basic canonical recall test \u2014 the model should have very high confidence on 'joseph' given the nativity context. Failure indicates poor coverage of the Matthean infancy narrative.", "surface_confounder": "" }, { "id": "CAN_082", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "easy", "input": "Now after Jesus was born in [MASK] of Judea in the days of Herod the king, behold, wise men from the east came to Jerusalem.", "targets": [ "bethlehem" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "nazareth", "jerusalem", "galilee", "capernaum", "egypt" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Matthew 2:1", "reasoning": "Bethlehem as the birthplace of Jesus is a foundational canonical fact reinforced by Micah 5:2 fulfillment. 'Nazareth' is the strongest confounder as Jesus' hometown, but the 'of Judea' qualifier and Herod context should disambiguate. Basic canonical geography test.", "surface_confounder": "nazareth" }, { "id": "CAN_083", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "You are the [MASK] of the earth, but if salt has lost its taste, how shall its saltiness be restored?", "targets": [ "salt" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "light", "bread", "hope", "seed", "fruit" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Matthew 5:13", "reasoning": "The salt/light pair is a distinctive Matthean discipleship metaphor. The downstream clause 'lost its taste' and 'saltiness' provide strong lexical cues. 'Light' is the paired metaphor from the next verse and the primary confounder. Correct prediction shows the model attends to intra-sentence coherence, not just topical association.", "surface_confounder": "light" }, { "id": "CAN_084", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "And lead us not into [MASK], but deliver us from evil.", "targets": [ "temptation" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "darkness", "sin", "death", "danger", "bondage" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Matthew 6:13", "reasoning": "The final petition of the Lord's Prayer pairs 'lead us not into temptation' with 'deliver us from evil.' 'Sin' is semantically close but not the canonical word. 'Darkness' reflects Johannine vocabulary, not Matthean. Precise prediction of 'temptation' over near-synonyms demonstrates canonical text fidelity.", "surface_confounder": "sin" }, { "id": "CAN_085", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "Again I tell you, it is easier for a [MASK] to go through the eye of a needle than for a rich person to enter the kingdom of God.", "targets": [ "camel" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "rope", "thread", "man", "horse", "donkey" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Matthew 19:24", "reasoning": "The camel-through-needle's-eye is one of Jesus' most memorable hyperboles. 'Rope' reflects a popular but incorrect textual emendation (kamelos vs. kamilos). Correct prediction of 'camel' shows the model has retained the actual canonical reading rather than the folk reinterpretation.", "surface_confounder": "rope" }, { "id": "CAN_086", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "hard", "input": "And about the ninth hour Jesus cried out with a loud voice, saying, Eli, Eli, lema [MASK]?", "targets": [ "sabachthani" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "hosanna", "amen", "hallelujah", "maranatha", "abba" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "Matthew 27:46", "reasoning": "The cry of dereliction preserves Aramaic in the Greek text. 'Sabachthani' is a rare transliterated term that tests deep canonical retention. BERT may tokenize this into subwords, making it a challenging recall test. However, the 'Eli Eli lema' trigram is sufficiently distinctive that a well-trained model should produce the completion.", "surface_confounder": "" }, { "id": "CAN_087", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "easy", "input": "Then Jesus said to him: Put your [MASK] back into its place, for all who take the sword will perish by the sword.", "targets": [ "sword" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "hand", "weapon", "knife", "shield", "staff" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Matthew 26:52", "reasoning": "Jesus' command to Peter in Gethsemane uses 'sword' twice \u2014 once as the object to sheath and once in the proverbial warning. The downstream 'take the sword will perish by the sword' provides an overwhelming lexical cue. This tests basic co-reference resolution within canonical text.", "surface_confounder": "" }, { "id": "CAN_088", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "His master said to him: Well done, good and [MASK] servant. You have been faithful over a little; I will set you over much.", "targets": [ "faithful" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "humble", "wise", "holy", "obedient", "worthy" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Matthew 25:21", "reasoning": "The parable of the talents uses the formulaic 'good and faithful servant' \u2014 a phrase with extremely high frequency in Christian discourse. The downstream 'faithful over a little' creates a near-tautological cue. 'Wise' would indicate conflation with the wise/foolish virgins parable in the same chapter.", "surface_confounder": "wise" }, { "id": "CON_104", "type": "contrastive_theology", "category": "christology", "difficulty": "hard", "input": "Simon Peter replied: You are the Christ, the Son of the [MASK] God.", "targets": [ "living" ], "foils": [ "true" ], "acceptable_alternatives": [], "failure_examples": [ "great", "good", "holy", "almighty", "eternal" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Matthew 16:16", "reasoning": "Peter's confession uses 'living God' \u2014 a specific Old Testament epithet emphasizing God's active reality versus dead idols. 'True' is the Johannine modifier (John 17:3) and represents a real cross-Gospel confusion. A model ranking 'living' above 'true' has correctly encoded the Matthean Christological confession rather than harmonizing with Johannine theology.", "surface_confounder": "true" }, { "id": "CON_105", "type": "contrastive_theology", "category": "soteriology", "difficulty": "medium", "input": "Jesus said: The Son of Man came not to be served but to [MASK], and to give his life as a ransom for many.", "targets": [ "serve" ], "foils": [ "reign" ], "acceptable_alternatives": [], "failure_examples": [ "judge", "conquer", "teach", "rule", "heal" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Matthew 20:28", "reasoning": "The 'not to be served but to serve' antithesis defines Jesus' mission as kenotic service. 'Reign' represents a theology-of-glory misreading that inverts the servant-king paradox. A model ranking 'serve' above 'reign' has internalized the Matthean paradox of messianic humility over triumphalist expectation.", "surface_confounder": "reign" }, { "id": "CON_106", "type": "contrastive_theology", "category": "ecclesiology", "difficulty": "medium", "input": "Where two or three are gathered in my name, there am I [MASK] them.", "targets": [ "among" ], "foils": [ "above" ], "acceptable_alternatives": [ "with" ], "failure_examples": [ "before", "beyond", "behind", "against", "over" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Matthew 18:20", "reasoning": "Jesus promises presence 'among' or 'with' gathered believers \u2014 an immanent, relational preposition. 'Above' would indicate a transcendence-only theology that misses the incarnational ecclesiology Matthew develops. The preposition choice reveals whether the model encodes Christ's presence as alongside the church or distant from it.", "surface_confounder": "above" }, { "id": "CON_107", "type": "contrastive_theology", "category": "christology", "difficulty": "hard", "input": "Jesus said to them: All [MASK] in heaven and on earth has been given to me.", "targets": [ "authority" ], "foils": [ "power" ], "acceptable_alternatives": [], "failure_examples": [ "glory", "honor", "wisdom", "praise", "dominion" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Matthew 28:18", "reasoning": "The Great Commission grounds the mission mandate in 'authority' (exousia), not raw 'power' (dynamis). The distinction matters Christologically: exousia implies delegated rightful rule, while dynamis implies mere force. A model that ranks 'authority' above 'power' has learned the specific Matthean commissioning vocabulary and the exousia-dynamis semantic distinction.", "surface_confounder": "power" }, { "id": "CON_108", "type": "contrastive_theology", "category": "eschatology", "difficulty": "medium", "input": "Then the King will say to those on his right: Come, you who are [MASK] by my Father, inherit the kingdom prepared for you.", "targets": [ "blessed" ], "foils": [ "chosen" ], "acceptable_alternatives": [], "failure_examples": [ "loved", "saved", "worthy", "called", "found" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Matthew 25:34", "reasoning": "In the sheep-and-goats judgment, 'blessed by my Father' is the canonical phrase. 'Chosen' imports Pauline election language that shifts the theological register from Matthean blessing-inheritance to predestinarian categories. Correct ranking demonstrates the model preserves Matthew's own eschatological vocabulary rather than harmonizing with Pauline soteriology.", "surface_confounder": "chosen" }, { "id": "CON_109", "type": "contrastive_theology", "category": "christology", "difficulty": "hard", "input": "Behold, the virgin shall conceive and bear a son, and they shall call his name Emmanuel, which means God [MASK] us.", "targets": [ "with" ], "foils": [ "for" ], "acceptable_alternatives": [], "failure_examples": [ "above", "over", "among", "beyond", "near" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Matthew 1:23", "reasoning": "'God with us' (Emmanuel) is a statement of divine presence and solidarity. 'God for us' shifts toward Pauline advocacy language (Romans 8:31). The preposition encodes a specific Christological claim about incarnational presence versus substitutionary representation. Ranking 'with' above 'for' shows the model has learned the Matthean Emmanuel theology of divine accompaniment.", "surface_confounder": "for" }, { "id": "CON_110", "type": "contrastive_theology", "category": "eschatology", "difficulty": "medium", "input": "But concerning that day and hour, no one knows, not even the angels of heaven, nor the [MASK], but the Father only.", "targets": [ "son" ], "foils": [ "spirit" ], "acceptable_alternatives": [], "failure_examples": [ "prophets", "saints", "apostles", "elect", "church" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Matthew 24:36", "reasoning": "Jesus' statement of eschatological ignorance names 'the Son' as not knowing the day \u2014 a christologically challenging admission. 'Spirit' as foil tests whether the model recognizes the trinitarian ordering in this verse: angels < Son < Father. A model predicting 'Spirit' has imposed a standard trinitarian sequence rather than learning the specific Matthean Olivet Discourse text.", "surface_confounder": "spirit" }, { "id": "CON_111", "type": "contrastive_theology", "category": "ecclesiology", "difficulty": "medium", "input": "And I tell you, you are Peter, and on this [MASK] I will build my church.", "targets": [ "rock" ], "foils": [ "faith" ], "acceptable_alternatives": [], "failure_examples": [ "ground", "foundation", "stone", "mountain", "truth" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Matthew 16:18", "reasoning": "'Rock' (petra) is the canonical word in Peter's commissioning, playing on the Petros/petra wordplay. 'Faith' represents the Protestant interpretive tradition that reads 'rock' as Peter's confession of faith rather than Peter himself. The model should predict the actual canonical word 'rock' over the theological interpretation 'faith' \u2014 this tests text fidelity versus doctrinal overlay.", "surface_confounder": "faith" }, { "id": "CON_112", "type": "contrastive_theology", "category": "christology", "difficulty": "hard", "input": "From that time Jesus began to show his disciples that he must go to Jerusalem and [MASK] many things from the elders and chief priests.", "targets": [ "suffer" ], "foils": [ "receive" ], "acceptable_alternatives": [], "failure_examples": [ "teach", "learn", "demand", "judge", "conquer" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Matthew 16:21", "reasoning": "Jesus' first passion prediction uses 'suffer' \u2014 the via crucis that defines Matthean Christology after Peter's confession. 'Receive' inverts the power dynamic, suggesting Jesus goes to Jerusalem to be honored rather than rejected. A model ranking 'suffer' above 'receive' has encoded the passion-prediction pattern central to the second half of Matthew.", "surface_confounder": "receive" }, { "id": "CON_113", "type": "contrastive_theology", "category": "eschatology", "difficulty": "medium", "input": "Then they will deliver you up to tribulation and put you to [MASK], and you will be hated by all nations for my name's sake.", "targets": [ "death" ], "foils": [ "shame" ], "acceptable_alternatives": [], "failure_examples": [ "work", "trial", "sleep", "flight", "prison" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Matthew 24:9", "reasoning": "The Olivet Discourse predicts lethal persecution \u2014 'put to death' is the canonical phrasing. 'Shame' represents a softened reading that reduces persecution to social stigma. Ranking 'death' above 'shame' shows the model has encoded the full severity of the Matthean persecution oracle rather than domesticating it.", "surface_confounder": "shame" }, { "id": "CON_114", "type": "contrastive_theology", "category": "eschatology", "difficulty": "hard", "input": "Then the righteous will shine like the [MASK] in the kingdom of their Father.", "targets": [ "sun" ], "foils": [ "stars" ], "acceptable_alternatives": [], "failure_examples": [ "moon", "fire", "gold", "light", "angels" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Matthew 13:43", "reasoning": "In the parable of the weeds explanation, the righteous shine 'like the sun' \u2014 a singular, supreme radiance. 'Stars' is the Danielic image (Daniel 12:3) and represents a cross-reference confusion. Matthew deliberately intensifies from Daniel's stars to the sun. Correct ranking shows the model distinguishes Matthean eschatological imagery from its Old Testament source.", "surface_confounder": "stars" }, { "id": "CON_115", "type": "contrastive_theology", "category": "kingdom_theology", "difficulty": "medium", "input": "The kingdom of [MASK] is like a grain of mustard seed that a man took and sowed in his field.", "targets": [ "heaven" ], "foils": [ "god" ], "acceptable_alternatives": [], "failure_examples": [ "earth", "man", "light", "glory", "israel" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Matthew 13:31", "reasoning": "'Kingdom of heaven' is Matthew's distinctive circumlocution \u2014 used over 30 times where the other Synoptics say 'kingdom of God.' 'God' is the Markan/Lukan parallel and thus a real harmonization error. A model ranking 'heaven' above 'god' has learned the specific Matthean periphrasis, one of the most distinctive vocabulary markers of this Gospel.", "surface_confounder": "god" }, { "id": "DOC_137", "type": "doctrinal_association", "category": "soteriology", "difficulty": "easy", "input": "For by [MASK] you have been saved through faith.", "targets": [ "grace" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "water", "fire", "law", "power" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Ephesians 2:8", "reasoning": "Grace-faith-salvation is the most fundamental soteriological triad in Pauline theology. Extremely high co-occurrence in training data.", "surface_confounder": "" }, { "id": "DOC_138", "type": "doctrinal_association", "category": "soteriology", "difficulty": "easy", "input": "The [MASK] of sins through his blood.", "targets": [ "forgiveness" ], "foils": [], "acceptable_alternatives": [ "remission", "redemption" ], "failure_examples": [ "creation", "naming", "counting" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Ephesians 1:7", "reasoning": "Forgiveness/remission of sins is a core soteriological phrase. The blood-sins collocation should strongly predict forgiveness.", "surface_confounder": "" }, { "id": "DOC_139", "type": "doctrinal_association", "category": "soteriology", "difficulty": "medium", "input": "Therefore, since we have been [MASK] by faith, we have peace with God.", "targets": [ "justified" ], "foils": [], "acceptable_alternatives": [ "saved" ], "failure_examples": [ "condemned", "punished", "abandoned", "tested" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Romans 5:1", "reasoning": "Justification by faith is distinctly Pauline. The model must associate 'by faith' + 'peace with God' with the specific forensic term 'justified' rather than the more generic 'saved'.", "surface_confounder": "" }, { "id": "DOC_140", "type": "doctrinal_association", "category": "soteriology", "difficulty": "medium", "input": "In him we have [MASK] through his blood.", "targets": [ "redemption" ], "foils": [], "acceptable_alternatives": [ "forgiveness", "salvation" ], "failure_examples": [ "power", "victory", "glory", "knowledge" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Ephesians 1:7", "reasoning": "Redemption-through-blood is a specific soteriological formulation requiring the model to link sacrificial blood imagery to the commercial metaphor of redemption.", "surface_confounder": "" }, { "id": "DOC_141", "type": "doctrinal_association", "category": "soteriology", "difficulty": "hard", "input": "He made him who knew no sin to be sin for us, so that we might become the [MASK] of God in him.", "targets": [ "righteousness" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "children", "servants", "people", "glory" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "2 Corinthians 5:21", "reasoning": "The great exchange doctrine requires understanding imputed righteousness. 'Children of God' is a plausible surface-level completion but misses the specific doctrinal point about the imputation of righteousness.", "surface_confounder": "" }, { "id": "DOC_142", "type": "doctrinal_association", "category": "soteriology", "difficulty": "hard", "input": "For the wages of sin is death, but the free [MASK] of God is eternal life.", "targets": [ "gift" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "law", "wrath", "word", "power" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Romans 6:23", "reasoning": "The wages/gift contrast is structurally critical. The model must understand the antithetical parallelism: wages (earned) vs. gift (unearned).", "surface_confounder": "" }, { "id": "DOC_143", "type": "doctrinal_association", "category": "soteriology", "difficulty": "medium", "input": "He himself bore our [MASK] in his body on the tree.", "targets": [ "sins" ], "foils": [], "acceptable_alternatives": [ "iniquities", "transgressions" ], "failure_examples": [ "gifts", "prayers", "names" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "1 Peter 2:24", "reasoning": "Substitutionary atonement language: bearing sins on the cross. The 'body on the tree' context should activate penal substitution semantics.", "surface_confounder": "" }, { "id": "DOC_144", "type": "doctrinal_association", "category": "soteriology", "difficulty": "hard", "input": "Whom God put forward as a [MASK] by his blood, to be received by faith.", "targets": [ "propitiation" ], "foils": [], "acceptable_alternatives": [ "sacrifice", "atonement", "offering" ], "failure_examples": [ "prophet", "king", "teacher", "leader" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Romans 3:25", "reasoning": "Propitiation is a specialized theological term for the satisfaction of divine wrath. This tests whether the model has learned technical soteriological vocabulary beyond common words.", "surface_confounder": "" }, { "id": "DOC_145", "type": "doctrinal_association", "category": "christology", "difficulty": "easy", "input": "And the [MASK] became flesh and dwelt among us.", "targets": [ "Word" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "law", "man", "king", "prophet" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "John 1:14", "reasoning": "The Johannine Logos christology is one of the most quoted verses in the corpus. Extremely high signal.", "surface_confounder": "" }, { "id": "DOC_146", "type": "doctrinal_association", "category": "christology", "difficulty": "easy", "input": "He is the [MASK] of the invisible God, the firstborn of all creation.", "targets": [ "image" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "servant", "friend", "angel", "prophet" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Colossians 1:15", "reasoning": "Christ as the image of God is a high-frequency christological formulation in the Colossians hymn.", "surface_confounder": "" }, { "id": "DOC_147", "type": "doctrinal_association", "category": "christology", "difficulty": "medium", "input": "Who, being in the form of God, did not consider equality with God something to be [MASK].", "targets": [ "grasped" ], "foils": [], "acceptable_alternatives": [ "seized", "exploited", "claimed" ], "failure_examples": [ "worshiped", "denied", "feared", "given" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Philippians 2:6", "reasoning": "The kenosis passage requires understanding Christ's voluntary self-emptying. The model must know the specific verb used in this creedal formulation.", "surface_confounder": "" }, { "id": "DOC_148", "type": "doctrinal_association", "category": "christology", "difficulty": "medium", "input": "He was raised on the third day according to the [MASK].", "targets": [ "Scriptures" ], "foils": [], "acceptable_alternatives": [ "scriptures" ], "failure_examples": [ "law", "prophets", "temple", "covenant" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "1 Corinthians 15:4", "reasoning": "The kerygmatic formula links resurrection to scriptural fulfillment. This tests whether the model has learned the early creedal pattern.", "surface_confounder": "" }, { "id": "DOC_149", "type": "doctrinal_association", "category": "christology", "difficulty": "hard", "input": "For in him the whole [MASK] of deity dwells bodily.", "targets": [ "fullness" ], "foils": [], "acceptable_alternatives": [ "fulness" ], "failure_examples": [ "power", "glory", "spirit", "nature" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Colossians 2:9", "reasoning": "The pleroma (fullness) concept is technical Pauline christology. 'Glory' or 'power' are plausible surface completions but miss the specific doctrinal claim about the totality of divine nature in Christ.", "surface_confounder": "" }, { "id": "DOC_150", "type": "doctrinal_association", "category": "christology", "difficulty": "hard", "input": "He is the [MASK] and the Omega, the first and the last, the beginning and the end.", "targets": [ "Alpha" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "Lord", "God", "King", "Word" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "Revelation 22:13", "reasoning": "Alpha-Omega is a fixed pair. The model needs to have learned Greek letter co-occurrence in this specific christological/theological title.", "surface_confounder": "" }, { "id": "DOC_151", "type": "doctrinal_association", "category": "pneumatology", "difficulty": "easy", "input": "But the [MASK] of the Spirit is love, joy, peace, patience, kindness.", "targets": [ "fruit" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "law", "word", "power", "work" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "Galatians 5:22", "reasoning": "Fruit of the Spirit is an extremely high-frequency phrase in theological training data.", "surface_confounder": "" }, { "id": "DOC_152", "type": "doctrinal_association", "category": "pneumatology", "difficulty": "medium", "input": "And do not grieve the Holy [MASK] of God, by whom you were sealed.", "targets": [ "Spirit" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "name", "law", "word", "temple" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "Ephesians 4:30", "reasoning": "Holy Spirit as the sealing agent. The combination of 'grieve' + 'Holy' + 'sealed' should strongly predict 'Spirit'.", "surface_confounder": "" }, { "id": "DOC_153", "type": "doctrinal_association", "category": "pneumatology", "difficulty": "medium", "input": "The Spirit himself [MASK] with our spirit that we are children of God.", "targets": [ "testifies" ], "foils": [], "acceptable_alternatives": [ "witnesses", "bears" ], "failure_examples": [ "fights", "speaks", "prays", "lives" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Romans 8:16", "reasoning": "The Spirit's testimony/witness role is a specific pneumatological function. The model must distinguish witnessing from other Spirit activities.", "surface_confounder": "" }, { "id": "DOC_154", "type": "doctrinal_association", "category": "pneumatology", "difficulty": "hard", "input": "Likewise the Spirit helps us in our weakness, for we do not know what to pray for as we ought, but the Spirit himself [MASK] for us.", "targets": [ "intercedes" ], "foils": [], "acceptable_alternatives": [ "prays" ], "failure_examples": [ "fights", "speaks", "judges", "waits" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Romans 8:26", "reasoning": "The Spirit's intercessory role is a specific, less frequent pneumatological function requiring contextual understanding of the prayer-weakness-Spirit relationship.", "surface_confounder": "" }, { "id": "DOC_155", "type": "doctrinal_association", "category": "pneumatology", "difficulty": "hard", "input": "Now there are varieties of [MASK], but the same Spirit.", "targets": [ "gifts" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "sins", "laws", "gods", "books" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "1 Corinthians 12:4", "reasoning": "Spiritual gifts discourse. The 'varieties...same Spirit' structure tests whether the model understands the unity-in-diversity pneumatological framework.", "surface_confounder": "" }, { "id": "DOC_156", "type": "doctrinal_association", "category": "ecclesiology", "difficulty": "easy", "input": "Christ is the [MASK] of the church, his body.", "targets": [ "head" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "friend", "servant", "prophet", "builder" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "Ephesians 5:23", "reasoning": "Head-body ecclesiological metaphor is extremely frequent in Pauline corpus and commentary.", "surface_confounder": "" }, { "id": "DOC_157", "type": "doctrinal_association", "category": "ecclesiology", "difficulty": "medium", "input": "Built on the foundation of the apostles and [MASK], Christ Jesus himself being the cornerstone.", "targets": [ "prophets" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "kings", "priests", "elders", "angels" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Ephesians 2:20", "reasoning": "Apostles-and-prophets as foundation is a specific ecclesiological formula. The model must know this pairing.", "surface_confounder": "" }, { "id": "DOC_158", "type": "doctrinal_association", "category": "ecclesiology", "difficulty": "medium", "input": "For we are members of his [MASK].", "targets": [ "body" ], "foils": [], "acceptable_alternatives": [ "household" ], "failure_examples": [ "law", "kingdom", "army", "nation" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Ephesians 5:30", "reasoning": "Body-of-Christ ecclesiology. The model must connect 'members' with the organic body metaphor rather than political or military metaphors.", "surface_confounder": "" }, { "id": "DOC_159", "type": "doctrinal_association", "category": "ecclesiology", "difficulty": "hard", "input": "Husbands, love your wives, as Christ loved the [MASK] and gave himself up for her.", "targets": [ "church" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "world", "law", "people", "nation" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Ephesians 5:25", "reasoning": "Christ-church bridal metaphor in the context of household code. The model must connect self-giving love with the church as bride, not the world generally.", "surface_confounder": "" }, { "id": "DOC_160", "type": "doctrinal_association", "category": "eschatology", "difficulty": "easy", "input": "For the Lord himself will [MASK] from heaven with a cry of command.", "targets": [ "descend" ], "foils": [], "acceptable_alternatives": [ "come", "return" ], "failure_examples": [ "speak", "judge", "create", "send" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "1 Thessalonians 4:16", "reasoning": "The parousia descent from heaven is a core eschatological image in Paul.", "surface_confounder": "" }, { "id": "DOC_161", "type": "doctrinal_association", "category": "eschatology", "difficulty": "medium", "input": "Then comes the end, when he delivers the [MASK] to God the Father.", "targets": [ "kingdom" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "law", "church", "world", "people" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "1 Corinthians 15:24", "reasoning": "The eschatological transfer of the kingdom to the Father is a specific Pauline eschatological concept.", "surface_confounder": "" }, { "id": "DOC_162", "type": "doctrinal_association", "category": "eschatology", "difficulty": "medium", "input": "And I saw the dead, great and small, standing before the [MASK].", "targets": [ "throne" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "temple", "altar", "cross", "wall" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Revelation 20:12", "reasoning": "The great white throne judgment scene. The model must connect 'dead standing before' with the judgment throne imagery.", "surface_confounder": "" }, { "id": "DOC_163", "type": "doctrinal_association", "category": "eschatology", "difficulty": "hard", "input": "For the trumpet will sound, and the dead will be raised [MASK].", "targets": [ "imperishable" ], "foils": [], "acceptable_alternatives": [ "incorruptible" ], "failure_examples": [ "again", "quickly", "first", "holy" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "1 Corinthians 15:52", "reasoning": "The resurrection body as imperishable/incorruptible is specific Pauline eschatological anthropology. Generic completions like 'again' miss the qualitative transformation Paul describes.", "surface_confounder": "" }, { "id": "DOC_164", "type": "doctrinal_association", "category": "theology_proper", "difficulty": "easy", "input": "Holy, holy, [MASK] is the Lord God Almighty.", "targets": [ "holy" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "great", "good", "mighty", "just" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "Isaiah 6:3 / Revelation 4:8", "reasoning": "The trisagion is the most recognizable doxological formula. Extremely high frequency.", "surface_confounder": "" }, { "id": "DOC_165", "type": "doctrinal_association", "category": "theology_proper", "difficulty": "easy", "input": "God is [MASK], and whoever abides in love abides in God.", "targets": [ "love" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "power", "law", "spirit", "truth" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "1 John 4:16", "reasoning": "'God is love' is one of the most frequently quoted divine attribute statements in the entire corpus.", "surface_confounder": "" }, { "id": "DOC_166", "type": "doctrinal_association", "category": "theology_proper", "difficulty": "medium", "input": "The Lord is [MASK] and abounding in steadfast love.", "targets": [ "merciful" ], "foils": [], "acceptable_alternatives": [ "gracious", "compassionate" ], "failure_examples": [ "angry", "powerful", "distant", "silent" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Psalm 103:8", "reasoning": "The divine attribute formula from Exodus 34:6 echoed throughout the Psalms. Tests whether the model has learned the OT chesed/racham vocabulary cluster.", "surface_confounder": "" }, { "id": "DOC_167", "type": "doctrinal_association", "category": "theology_proper", "difficulty": "medium", "input": "Great is our Lord, and abundant in [MASK].", "targets": [ "power" ], "foils": [], "acceptable_alternatives": [ "strength", "might" ], "failure_examples": [ "sin", "anger", "fear", "confusion" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Psalm 147:5", "reasoning": "Divine omnipotence in psalmic praise context. The model must associate 'great' + 'abundant' with divine power attributes.", "surface_confounder": "" }, { "id": "DOC_168", "type": "doctrinal_association", "category": "theology_proper", "difficulty": "hard", "input": "Before the mountains were brought forth, or ever you had formed the earth and the world, from [MASK] to everlasting you are God.", "targets": [ "everlasting" ], "foils": [], "acceptable_alternatives": [ "eternity" ], "failure_examples": [ "heaven", "creation", "beginning", "above" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Psalm 90:2", "reasoning": "The 'everlasting to everlasting' formula for divine eternality. The model must understand the mirror-structure expressing God's timelessness.", "surface_confounder": "" }, { "id": "DOC_169", "type": "doctrinal_association", "category": "theology_proper", "difficulty": "hard", "input": "The [MASK] of the Lord is the beginning of wisdom.", "targets": [ "fear" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "word", "law", "love", "power" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Proverbs 9:10", "reasoning": "Fear of the Lord as the foundation of wisdom is a core OT theological concept repeated across Proverbs, Psalms, and Job. Tests proper reverence semantics.", "surface_confounder": "" }, { "id": "DOC_170", "type": "doctrinal_association", "category": "hamartiology", "difficulty": "easy", "input": "For all have [MASK] and fall short of the glory of God.", "targets": [ "sinned" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "believed", "hoped", "prayed", "lived" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "Romans 3:23", "reasoning": "Universal sinfulness is the most frequently quoted hamartiological text. Extremely high frequency.", "surface_confounder": "" }, { "id": "DOC_171", "type": "doctrinal_association", "category": "hamartiology", "difficulty": "medium", "input": "Therefore, just as sin came into the world through one [MASK], and death through sin.", "targets": [ "man" ], "foils": [], "acceptable_alternatives": [ "person" ], "failure_examples": [ "day", "law", "act", "word" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Romans 5:12", "reasoning": "Adamic federal headship \u2014 sin entering through one man (Adam). The model must connect 'sin came into the world through one' with the Adamic narrative.", "surface_confounder": "" }, { "id": "DOC_172", "type": "doctrinal_association", "category": "hamartiology", "difficulty": "hard", "input": "The heart is [MASK] above all things, and desperately sick.", "targets": [ "deceitful" ], "foils": [], "acceptable_alternatives": [ "wicked" ], "failure_examples": [ "strong", "pure", "faithful", "good" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Jeremiah 17:9", "reasoning": "Total depravity of the heart. The model must resist positive-valence completions (strong, pure, faithful) and predict a negative attribute, specifically 'deceitful'.", "surface_confounder": "" }, { "id": "DOC_173", "type": "doctrinal_association", "category": "hamartiology", "difficulty": "hard", "input": "Behold, I was brought forth in [MASK], and in sin did my mother conceive me.", "targets": [ "iniquity" ], "foils": [], "acceptable_alternatives": [ "sin" ], "failure_examples": [ "joy", "love", "hope", "faith" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Psalm 51:5", "reasoning": "Original sin / inherited depravity. The Davidic confession requires understanding that 'brought forth in' precedes a negative moral state, not a positive one.", "surface_confounder": "" }, { "id": "DOC_174", "type": "doctrinal_association", "category": "bibliology", "difficulty": "easy", "input": "All Scripture is [MASK] by God and profitable for teaching.", "targets": [ "inspired" ], "foils": [], "acceptable_alternatives": [ "breathed" ], "failure_examples": [ "written", "spoken", "created", "given" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "2 Timothy 3:16", "reasoning": "Theopneustos \u2014 God-breathed/inspired Scripture is the foundational bibliology text. High frequency in theological commentary.", "surface_confounder": "" }, { "id": "DOC_175", "type": "doctrinal_association", "category": "bibliology", "difficulty": "medium", "input": "For no prophecy was ever produced by the will of man, but men spoke from God as they were carried along by the Holy [MASK].", "targets": [ "Spirit" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "law", "word", "fire", "angel" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "2 Peter 1:21", "reasoning": "Spirit-carried inspiration of Scripture. The combination of prophecy + carried along + Holy strongly predicts Spirit.", "surface_confounder": "" }, { "id": "DOC_176", "type": "doctrinal_association", "category": "bibliology", "difficulty": "hard", "input": "The word of God is living and active, sharper than any two-edged [MASK].", "targets": [ "sword" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "knife", "blade", "spear", "arrow" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Hebrews 4:12", "reasoning": "The sword-of-the-word metaphor. While 'blade' or 'knife' are semantically similar, the specific biblical metaphor uses 'sword' consistently.", "surface_confounder": "" }, { "id": "CAN_089", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "easy", "input": "The gospel according to [MASK].", "targets": [ "Matthew", "Mark", "Luke", "John" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "Paul", "Moses", "Abraham", "Peter", "Isaiah", "David" ], "pass_condition": "all_top_k_in_target_set", "k": 5, "reference": "", "reasoning": "The model should know that only four gospel authors exist and not predict epistle authors or OT figures.", "surface_confounder": "" }, { "id": "CAN_090", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "easy", "input": "The epistle of Paul to the [MASK].", "targets": [ "Romans", "Corinthians", "Galatians", "Ephesians", "Philippians", "Colossians", "Thessalonians" ], "foils": [], "acceptable_alternatives": [ "Hebrews" ], "failure_examples": [ "Psalms", "Genesis", "Revelation", "Judges", "Proverbs" ], "pass_condition": "all_top_k_in_target_set", "k": 5, "reference": "", "reasoning": "The model should restrict predictions to valid Pauline epistle recipients and not bleed into OT books or non-epistolary NT books.", "surface_confounder": "" }, { "id": "CAN_091", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "easy", "input": "In the beginning [MASK] created the heavens and the earth.", "targets": [ "God" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "Moses", "Adam", "Abraham", "man" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "Genesis 1:1", "reasoning": "The most recognizable verse in the Bible. Tests basic creation theology attribution.", "surface_confounder": "" }, { "id": "CAN_092", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "The book of the prophet [MASK], who prophesied during the reign of Uzziah, Jotham, Ahaz, and Hezekiah.", "targets": [ "Isaiah" ], "foils": [], "acceptable_alternatives": [ "Hosea", "Micah" ], "failure_examples": [ "Paul", "Matthew", "David", "Moses", "Daniel" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Isaiah 1:1", "reasoning": "Prophet-to-historical-period association. The four kings listed are specifically mentioned in Isaiah's superscription. Tests canonical structural knowledge.", "surface_confounder": "" }, { "id": "CAN_093", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "And [MASK] said to the LORD, Here am I, send me.", "targets": [ "Isaiah" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "Moses", "David", "Paul", "Peter", "Abraham" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Isaiah 6:8", "reasoning": "Character-to-event: Isaiah's commissioning scene. While 'Here am I' appears elsewhere (Abraham, Samuel), the 'send me' addition is distinctively Isaianic.", "surface_confounder": "" }, { "id": "CAN_094", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "The LORD is my [MASK], I shall not want.", "targets": [ "shepherd" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "lord", "king", "father", "judge" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "Psalm 23:1", "reasoning": "Psalm 23 is the most recognized psalm. The shepherd metaphor for God is a core OT theological image.", "surface_confounder": "" }, { "id": "CAN_095", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "By faith [MASK] offered to God a more acceptable sacrifice than Cain.", "targets": [ "Abel" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "Abraham", "Moses", "Noah", "David" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Hebrews 11:4", "reasoning": "Character-to-event in Hebrews 11 faith hall of fame. The Cain-Abel contrast is the specific identifier.", "surface_confounder": "" }, { "id": "CAN_096", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "By faith [MASK] was commended as righteous, and by faith he was taken up so that he should not see death.", "targets": [ "Enoch" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "Abraham", "Moses", "Elijah", "David" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Hebrews 11:5", "reasoning": "Enoch is uniquely 'taken up' to not see death. Elijah is also taken up but the 'commended as righteous' phrasing is specific to Enoch in Hebrews 11.", "surface_confounder": "" }, { "id": "CAN_097", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "hard", "input": "By faith [MASK], when he was old, blessed each of the sons of Joseph.", "targets": [ "Jacob" ], "foils": [], "acceptable_alternatives": [ "Israel" ], "failure_examples": [ "Abraham", "Moses", "Isaac", "David" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Hebrews 11:21", "reasoning": "Jacob blessing Joseph's sons is a specific Genesis narrative detail. Isaac is a strong confounder since he also blessed sons (Esau and Jacob).", "surface_confounder": "" }, { "id": "CAN_098", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "easy", "input": "The revelation of Jesus Christ to his servant [MASK].", "targets": [ "John" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "Paul", "Peter", "Moses", "Isaiah" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "Revelation 1:1", "reasoning": "Book-to-author: Revelation is attributed to John. Basic canonical knowledge.", "surface_confounder": "" }, { "id": "CAN_099", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "Paul, a servant of Christ Jesus, called to be an apostle, to all in [MASK] who are loved by God.", "targets": [ "Rome" ], "foils": [], "acceptable_alternatives": [ "Romans" ], "failure_examples": [ "Corinth", "Ephesus", "Jerusalem", "Galatia" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Romans 1:1,7", "reasoning": "Epistle-to-audience: The specific opening address of Romans. The model must associate this particular salutation formula with Rome.", "surface_confounder": "" }, { "id": "CAN_100", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "The proverbs of [MASK], son of David, king of Israel.", "targets": [ "Solomon" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "David", "Moses", "Abraham", "Isaiah" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "Proverbs 1:1", "reasoning": "Book-to-author: Proverbs attributed to Solomon. 'Son of David, king' is the specific identifier.", "surface_confounder": "" }, { "id": "CAN_101", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "hard", "input": "The words of [MASK] the son of Hilkiah, one of the priests who were in Anathoth.", "targets": [ "Jeremiah" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "Isaiah", "Ezekiel", "Daniel", "Hosea" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Jeremiah 1:1", "reasoning": "Prophet-to-lineage: Jeremiah son of Hilkiah from Anathoth. Tests detailed canonical knowledge of prophetic book superscriptions.", "surface_confounder": "" }, { "id": "CAN_102", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "hard", "input": "In the third year of the reign of Jehoiakim king of Judah, [MASK] came to Jerusalem and besieged it.", "targets": [ "Nebuchadnezzar" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "Pharaoh", "Caesar", "Herod", "Cyrus" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Daniel 1:1", "reasoning": "Historical-event knowledge: Nebuchadnezzar's siege of Jerusalem under Jehoiakim. Tests the model's grasp of biblical historical narrative.", "surface_confounder": "" }, { "id": "CAN_103", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "easy", "input": "The LORD said to [MASK], Go from your country and your kindred to the land that I will show you.", "targets": [ "Abram" ], "foils": [], "acceptable_alternatives": [ "Abraham" ], "failure_examples": [ "Moses", "Joshua", "Jacob", "David" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Genesis 12:1", "reasoning": "The Abrahamic call narrative is foundational. 'Go from your country' is the specific identifier.", "surface_confounder": "" }, { "id": "CAN_104", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "After the death of [MASK] the servant of the LORD, the LORD said to Joshua.", "targets": [ "Moses" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "Joshua", "Abraham", "David", "Samuel" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "Joshua 1:1", "reasoning": "Book transition: Moses to Joshua succession. The model must know the narrative order of leadership.", "surface_confounder": "" }, { "id": "CAN_105", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "hard", "input": "Now the word of the LORD came to [MASK] the son of Amittai.", "targets": [ "Jonah" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "Isaiah", "Jeremiah", "Elijah", "Hosea" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Jonah 1:1", "reasoning": "Prophet-to-patronymic: Jonah son of Amittai. Tests whether the model has learned prophetic genealogical identifiers.", "surface_confounder": "" }, { "id": "CAN_106", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "Then [MASK] answered the LORD and said, I know that you can do all things.", "targets": [ "Job" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "Moses", "David", "Isaiah", "Abraham" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Job 42:1-2", "reasoning": "Character-to-event: Job's final response to God's theophany. The 'I know that you can do all things' is specifically from Job's restoration speech.", "surface_confounder": "" }, { "id": "CAN_107", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "hard", "input": "In the year that king [MASK] died I saw the Lord sitting upon a throne, high and lifted up.", "targets": [ "Uzziah" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "David", "Solomon", "Hezekiah", "Josiah" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Isaiah 6:1", "reasoning": "Very specific historical reference. Isaiah's throne room vision is dated to Uzziah's death year. Tests whether the model has memorized this specific historical anchor.", "surface_confounder": "" }, { "id": "CAN_108", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "easy", "input": "Jesus said to him, I am the way, and the [MASK], and the life.", "targets": [ "truth" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "law", "light", "word", "door" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "John 14:6", "reasoning": "Way-truth-life triad is one of the most quoted Jesus sayings. Tests basic familiarity with Johannine 'I am' statements.", "surface_confounder": "" }, { "id": "CAN_109", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "I am the good [MASK]. The good [MASK] lays down his life for the sheep.", "targets": [ "shepherd" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "king", "master", "lord", "father" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "John 10:11", "reasoning": "Johannine 'I am' statement. The 'lays down his life for the sheep' context uniquely identifies the shepherd metaphor.", "surface_confounder": "" }, { "id": "CAN_110", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "Blessed are the [MASK] in spirit, for theirs is the kingdom of heaven.", "targets": [ "poor" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "rich", "strong", "wise", "holy" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Matthew 5:3", "reasoning": "Beatitudes opening. 'Poor in spirit' is counterintuitive \u2014 the model must resist positive-valence completions and predict the paradoxical 'poor'.", "surface_confounder": "" }, { "id": "CAN_111", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "hard", "input": "Now there was a man of the Pharisees named [MASK], a ruler of the Jews.", "targets": [ "Nicodemus" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "Paul", "Gamaliel", "Joseph", "Simon" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "John 3:1", "reasoning": "Character identification: Nicodemus is the only Pharisee described as 'a ruler of the Jews' who visits Jesus by night. Tests detailed narrative knowledge.", "surface_confounder": "" }, { "id": "CAN_112", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "hard", "input": "In the beginning was the [MASK], and the [MASK] was with God.", "targets": [ "Word" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "light", "spirit", "law", "world" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "John 1:1", "reasoning": "Johannine prologue. The repeated [MASK] structure should help the model identify the Logos concept.", "surface_confounder": "" }, { "id": "CAN_113", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "The book of the genealogy of [MASK] Christ, the son of David, the son of Abraham.", "targets": [ "Jesus" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "God", "the", "our", "Lord" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "Matthew 1:1", "reasoning": "Opening of Matthew's gospel. The 'genealogy of [X] Christ, son of David, son of Abraham' uniquely identifies Jesus.", "surface_confounder": "" }, { "id": "CON_116", "type": "contrastive_theology", "category": "soteriology", "difficulty": "medium", "input": "The [MASK] of Christ makes us right with God.", "targets": [ "righteousness" ], "foils": [ "sin" ], "acceptable_alternatives": [ "blood", "death", "sacrifice" ], "failure_examples": [], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Romans 5:17-19", "reasoning": "Tests imputed righteousness vs. co-occurrence confusion.", "surface_confounder": "Both 'sin' and 'righteousness' co-occur heavily with 'Christ' and 'God'. A naive model trained on passages about Christ's relationship to sin (bearing sin, conquering sin) might predict 'sin' over 'righteousness'." }, { "id": "CON_117", "type": "contrastive_theology", "category": "soteriology", "difficulty": "medium", "input": "For the [MASK] of the law is Christ, for righteousness to everyone who believes.", "targets": [ "end" ], "foils": [ "beginning" ], "acceptable_alternatives": [ "fulfillment", "goal" ], "failure_examples": [], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Romans 10:4", "reasoning": "Christ as the telos (end/goal) of the law. 'Beginning' is a plausible temporal term, but the theological point is that Christ terminates/fulfills the law.", "surface_confounder": "Both 'beginning' and 'end' are common in theological formulations. Christ is often called 'the beginning' (Colossians 1:18, Revelation 3:14), so surface co-occurrence could favor it." }, { "id": "CON_118", "type": "contrastive_theology", "category": "soteriology", "difficulty": "hard", "input": "We are [MASK] by grace, not by works.", "targets": [ "saved" ], "foils": [ "condemned" ], "acceptable_alternatives": [ "justified", "redeemed" ], "failure_examples": [], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Ephesians 2:8-9", "reasoning": "Salvation by grace vs. condemnation. Grace is the mechanism of salvation, not condemnation.", "surface_confounder": "'Condemned' co-occurs with 'works' in passages about judgment (not by works of the law). A model that has learned law-works-condemnation associations might get confused." }, { "id": "CON_119", "type": "contrastive_theology", "category": "soteriology", "difficulty": "hard", "input": "Abraham believed God, and it was counted to him as [MASK].", "targets": [ "righteousness" ], "foils": [ "faith" ], "acceptable_alternatives": [], "failure_examples": [], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Romans 4:3 / Genesis 15:6", "reasoning": "Imputed righteousness, not imputed faith. Faith is the instrument, righteousness is what is credited.", "surface_confounder": "'Faith' and 'believed' are near-synonyms. A model might predict 'faith' because it co-occurs with 'Abraham' and 'believed' frequently. But the theological point is that belief results in credited righteousness, not credited faith." }, { "id": "CON_120", "type": "contrastive_theology", "category": "soteriology", "difficulty": "medium", "input": "For by [MASK] you have been saved, not by works.", "targets": [ "grace" ], "foils": [ "law" ], "acceptable_alternatives": [ "faith" ], "failure_examples": [], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Ephesians 2:8-9", "reasoning": "Grace vs. law as the instrument of salvation. The law-works association is strong, but grace is what saves.", "surface_confounder": "'Law' and 'works' co-occur extremely frequently. A model might complete 'by law...not by works' as a coherent (but doctrinally wrong) phrase." }, { "id": "CON_121", "type": "contrastive_theology", "category": "soteriology", "difficulty": "hard", "input": "There is therefore now no [MASK] for those who are in Christ Jesus.", "targets": [ "condemnation" ], "foils": [ "salvation" ], "acceptable_alternatives": [], "failure_examples": [], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Romans 8:1", "reasoning": "The 'no X for those in Christ' construction. 'Salvation' is a strong Christ-association but is doctrinally opposite here \u2014 the point is the absence of condemnation, not the absence of salvation.", "surface_confounder": "'Salvation' and 'Christ Jesus' co-occur more frequently than 'condemnation' and 'Christ Jesus' in many contexts. But the 'no' negation requires the negative concept." }, { "id": "CON_122", "type": "contrastive_theology", "category": "christology", "difficulty": "medium", "input": "Who, though he was in the form of God, [MASK] himself by taking the form of a servant.", "targets": [ "emptied" ], "foils": [ "exalted" ], "acceptable_alternatives": [ "humbled" ], "failure_examples": [], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Philippians 2:6-7", "reasoning": "Kenosis \u2014 self-emptying, not self-exaltation. The Christ hymn moves downward before moving upward.", "surface_confounder": "'Exalted' appears in the very next section of the same hymn (Phil 2:9). Both words are strongly associated with Christ in this passage, but in opposite directional movements." }, { "id": "CON_123", "type": "contrastive_theology", "category": "christology", "difficulty": "medium", "input": "He was [MASK] for our transgressions.", "targets": [ "pierced" ], "foils": [ "praised" ], "acceptable_alternatives": [ "wounded", "crushed" ], "failure_examples": [], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Isaiah 53:5", "reasoning": "Suffering servant christology. Christ is pierced/wounded, not praised, for transgressions.", "surface_confounder": "'Praised' is a frequent predicate of God/Christ. A model that has learned 'He was [positive verb]' patterns for Christ might prefer 'praised' over the suffering-servant language." }, { "id": "CON_124", "type": "contrastive_theology", "category": "christology", "difficulty": "hard", "input": "Christ [MASK] once for sins, the righteous for the unrighteous.", "targets": [ "suffered" ], "foils": [ "reigned" ], "acceptable_alternatives": [ "died" ], "failure_examples": [], "pass_condition": "correct_beats_foil", "k": 5, "reference": "1 Peter 3:18", "reasoning": "Substitutionary suffering \u2014 Christ suffered, not reigned, in the context of 'for sins'.", "surface_confounder": "'Christ reigned' is a valid theological statement elsewhere. The 'once for sins' and 'righteous for unrighteous' qualifiers specify the substitutionary context requiring suffering language." }, { "id": "CON_125", "type": "contrastive_theology", "category": "christology", "difficulty": "hard", "input": "He made himself [MASK], being born in the likeness of men.", "targets": [ "nothing" ], "foils": [ "God" ], "acceptable_alternatives": [ "low", "humble" ], "failure_examples": [], "pass_condition": "correct_beats_foil", "k": 10, "reference": "Philippians 2:7", "reasoning": "Kenotic christology continued. 'Made himself God' is Johannine (John 5:18 accusation), but in the Philippians hymn the movement is downward: made himself nothing/of no reputation.", "surface_confounder": "'Made himself God' is a frequent phrase in christological controversy passages. The model must distinguish the Philippians kenosis context from the Johannine deity-claim context." }, { "id": "CON_126", "type": "contrastive_theology", "category": "christology", "difficulty": "medium", "input": "For our sake he made him to be [MASK], so that in him we might become the righteousness of God.", "targets": [ "sin" ], "foils": [ "righteous" ], "acceptable_alternatives": [], "failure_examples": [], "pass_condition": "correct_beats_foil", "k": 5, "reference": "2 Corinthians 5:21", "reasoning": "The great exchange: Christ became sin (not righteous) so we could become righteous. The 'so that...righteousness' clause means the first slot must be the opposite.", "surface_confounder": "'Made him to be righteous' sounds positive and Christ-affirming. But the entire point is the paradox: Christ was made sin. The model must understand the antithetical structure." }, { "id": "CON_127", "type": "contrastive_theology", "category": "pneumatology", "difficulty": "medium", "input": "The Spirit gives [MASK], but the letter kills.", "targets": [ "life" ], "foils": [ "law" ], "acceptable_alternatives": [], "failure_examples": [], "pass_condition": "correct_beats_foil", "k": 5, "reference": "2 Corinthians 3:6", "reasoning": "Spirit-letter antithesis. The Spirit gives life (not law), contrasted with the letter that kills.", "surface_confounder": "'Spirit' and 'law' co-occur very frequently in Romans and Galatians. But the specific contrast here is Spirit=life vs. letter=death." }, { "id": "CON_128", "type": "contrastive_theology", "category": "pneumatology", "difficulty": "hard", "input": "For God gave us a spirit not of [MASK] but of power and love and self-control.", "targets": [ "fear" ], "foils": [ "strength" ], "acceptable_alternatives": [ "timidity", "cowardice" ], "failure_examples": [], "pass_condition": "correct_beats_foil", "k": 5, "reference": "2 Timothy 1:7", "reasoning": "The 'not of X but of Y' construction. The Spirit is characterized by power, love, and self-control \u2014 contrasted with fear/timidity.", "surface_confounder": "'Spirit of strength' sounds theologically plausible, but the 'not of' negation requires something that is being rejected. 'Strength' is too similar to 'power' in the positive list." }, { "id": "CON_129", "type": "contrastive_theology", "category": "pneumatology", "difficulty": "medium", "input": "Walk by the [MASK], and you will not gratify the desires of the flesh.", "targets": [ "Spirit" ], "foils": [ "law" ], "acceptable_alternatives": [], "failure_examples": [], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Galatians 5:16", "reasoning": "Spirit-flesh antithesis in Galatians. Walking by the Spirit (not the law) overcomes fleshly desires.", "surface_confounder": "The law is also presented as a means to address sin/flesh in many OT passages. The Pauline innovation is that the Spirit (not the law) is the means of overcoming the flesh." }, { "id": "CON_130", "type": "contrastive_theology", "category": "theology_proper", "difficulty": "medium", "input": "The [MASK] of God toward man is everlasting.", "targets": [ "love" ], "foils": [ "worship" ], "acceptable_alternatives": [ "mercy", "grace", "faithfulness", "kindness" ], "failure_examples": [], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Jeremiah 31:3", "reasoning": "Directional relationship: God\u2192man is love/mercy/grace, not worship. Worship flows man\u2192God.", "surface_confounder": "'Worship of God' is an extremely frequent phrase. But 'of God toward man' specifies the direction, and God does not worship man." }, { "id": "CON_131", "type": "contrastive_theology", "category": "theology_proper", "difficulty": "medium", "input": "The [MASK] of man toward God should be wholehearted.", "targets": [ "worship" ], "foils": [ "grace" ], "acceptable_alternatives": [ "devotion", "love", "obedience", "fear" ], "failure_examples": [], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Deuteronomy 6:5", "reasoning": "Directional relationship: man\u2192God is worship/devotion/obedience, not grace. Grace flows God\u2192man.", "surface_confounder": "'Grace of God' is extremely common, but 'of man toward God' reverses the direction. Grace is a divine attribute, not a human offering." }, { "id": "CON_132", "type": "contrastive_theology", "category": "theology_proper", "difficulty": "hard", "input": "God is [MASK] in all his ways and faithful in all he does.", "targets": [ "righteous" ], "foils": [ "wrathful" ], "acceptable_alternatives": [ "just", "holy", "gracious" ], "failure_examples": [], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Psalm 145:17", "reasoning": "Divine attribute in praise context. 'Righteous in all his ways' is a general attribute of God's character.", "surface_confounder": "'Wrathful' is a real divine attribute that co-occurs with God frequently. But in a praise psalm describing all God's ways comprehensively, 'righteous' is the holistic positive attribute." }, { "id": "CON_133", "type": "contrastive_theology", "category": "theology_proper", "difficulty": "hard", "input": "For the Lord your God is a [MASK] God.", "targets": [ "jealous" ], "foils": [ "loving" ], "acceptable_alternatives": [ "merciful", "gracious" ], "failure_examples": [], "pass_condition": "correct_beats_foil", "k": 10, "reference": "Deuteronomy 4:24", "reasoning": "Divine jealousy \u2014 a counterintuitive attribute. The specific OT formula 'the Lord your God is a [X] God' frequently completes with 'jealous' in covenant contexts.", "surface_confounder": "'Loving God' is far more common in general theological discourse. But the specific OT formula with the covenantal 'your God' often yields 'jealous' as a covenant-fidelity term." }, { "id": "CON_134", "type": "contrastive_theology", "category": "theology_proper", "difficulty": "medium", "input": "The Lord is slow to [MASK] and abounding in steadfast love.", "targets": [ "anger" ], "foils": [ "forgive" ], "acceptable_alternatives": [ "wrath" ], "failure_examples": [], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Exodus 34:6 / Numbers 14:18", "reasoning": "The 'slow to anger' formula from the divine self-revelation at Sinai. 'Slow to forgive' inverts the theology.", "surface_confounder": "'Slow to forgive' sounds plausible and 'forgive' has strong God-associations. But the Exodus 34:6 formula specifically pairs 'slow to anger' with 'abounding in steadfast love' \u2014 God is quick to forgive, slow to anger." }, { "id": "CON_135", "type": "contrastive_theology", "category": "ecclesiology", "difficulty": "medium", "input": "The church is the [MASK] of Christ.", "targets": [ "body" ], "foils": [ "enemy" ], "acceptable_alternatives": [ "bride", "temple" ], "failure_examples": [], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Ephesians 1:22-23", "reasoning": "Church-Christ organic relationship. The church is the body (or bride/temple), not the enemy, of Christ.", "surface_confounder": "'Enemy of Christ' appears in passages about opposition to Christ. Both 'body' and 'enemy' have 'of Christ' constructions, but the ecclesiological context demands the positive organic metaphor." }, { "id": "CON_136", "type": "contrastive_theology", "category": "ecclesiology", "difficulty": "hard", "input": "For no one ever hated his own [MASK], but nourishes and cherishes it, just as Christ does the church.", "targets": [ "flesh" ], "foils": [ "soul" ], "acceptable_alternatives": [ "body" ], "failure_examples": [], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Ephesians 5:29", "reasoning": "The marriage-church analogy. Paul uses 'flesh' (not 'soul') for the nourishing/cherishing metaphor linking marriage to Christ-church union.", "surface_confounder": "'Soul' is a plausible spiritual-sounding completion. But Paul's argument is about the physical/embodied nature of care: nourishing and cherishing are flesh-care verbs in this context." }, { "id": "CON_137", "type": "contrastive_theology", "category": "ecclesiology", "difficulty": "medium", "input": "There is one [MASK] and one Spirit, just as you were called to the one hope.", "targets": [ "body" ], "foils": [ "God" ], "acceptable_alternatives": [], "failure_examples": [], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Ephesians 4:4", "reasoning": "The Ephesians 4 unity formula lists: one body, one Spirit, one hope, one Lord, one faith, one baptism, one God. The pairing with 'one Spirit' in this specific clause identifies 'body'.", "surface_confounder": "'One God' is a valid completion and occurs later in the same passage. But the ordering of the formula pairs body-Spirit, not God-Spirit, in this clause." }, { "id": "CON_138", "type": "contrastive_theology", "category": "eschatology", "difficulty": "medium", "input": "The dead in Christ will [MASK] first.", "targets": [ "rise" ], "foils": [ "perish" ], "acceptable_alternatives": [ "be raised" ], "failure_examples": [], "pass_condition": "correct_beats_foil", "k": 5, "reference": "1 Thessalonians 4:16", "reasoning": "Resurrection of the dead in Christ \u2014 they rise, not perish.", "surface_confounder": "'Dead' + 'perish' is a natural collocation. But 'dead in Christ' specifies resurrection hope, not final destruction." }, { "id": "CON_139", "type": "contrastive_theology", "category": "eschatology", "difficulty": "hard", "input": "It is sown in [MASK]; it is raised in glory.", "targets": [ "dishonor" ], "foils": [ "honor" ], "acceptable_alternatives": [ "weakness", "corruption" ], "failure_examples": [], "pass_condition": "correct_beats_foil", "k": 10, "reference": "1 Corinthians 15:43", "reasoning": "Resurrection body contrasts: sown in dishonor, raised in glory. The 'sown' slot takes the negative term.", "surface_confounder": "'Sown in honor' sounds positive and plausible. But the antithetical parallelism (sown in X / raised in Y) requires X to be the negative counterpart of Y." }, { "id": "CON_140", "type": "contrastive_theology", "category": "eschatology", "difficulty": "medium", "input": "For we must all appear before the [MASK] seat of Christ.", "targets": [ "judgment" ], "foils": [ "mercy" ], "acceptable_alternatives": [], "failure_examples": [], "pass_condition": "correct_beats_foil", "k": 5, "reference": "2 Corinthians 5:10", "reasoning": "The bema (judgment seat) of Christ. 'Mercy seat' is a real biblical concept but refers to the ark of the covenant, not the eschatological tribunal.", "surface_confounder": "'Mercy seat' is a frequent biblical phrase (Hebrew: kapporet). But in eschatological context ('must all appear before'), it is the judgment seat (bema), not the mercy seat." }, { "id": "CON_141", "type": "contrastive_theology", "category": "eschatology", "difficulty": "hard", "input": "Then I saw a new [MASK] and a new earth, for the first heaven and the first earth had passed away.", "targets": [ "heaven" ], "foils": [ "temple" ], "acceptable_alternatives": [], "failure_examples": [], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Revelation 21:1", "reasoning": "New heaven and new earth \u2014 the eschatological renewal. 'Temple' is explicitly absent in the new Jerusalem (Rev 21:22).", "surface_confounder": "'New temple' is plausible in eschatological contexts (Ezekiel's temple vision). But Revelation 21-22 explicitly features new heaven/earth and states there is NO temple." }, { "id": "CON_142", "type": "contrastive_theology", "category": "hamartiology", "difficulty": "medium", "input": "For the [MASK] of sin is death.", "targets": [ "wages" ], "foils": [ "gift" ], "acceptable_alternatives": [], "failure_examples": [], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Romans 6:23", "reasoning": "Wages of sin (earned punishment) vs. gift of God (unearned grace). Sin pays wages; it doesn't give gifts.", "surface_confounder": "'Gift' appears in the immediately following clause ('the free gift of God is eternal life'). A model that has learned the verse as a chunk might confuse which slot gets 'wages' vs. 'gift'." }, { "id": "CON_143", "type": "contrastive_theology", "category": "hamartiology", "difficulty": "hard", "input": "Through one man [MASK] entered the world, and death through sin.", "targets": [ "sin" ], "foils": [ "life" ], "acceptable_alternatives": [], "failure_examples": [], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Romans 5:12", "reasoning": "The fall narrative: sin (not life) entered through Adam. Life enters through Christ (the second Adam).", "surface_confounder": "'Through one man life entered' is the Christological counterpart in the same chapter. The model must distinguish Adam (sin entry) from Christ (life entry)." }, { "id": "CON_144", "type": "contrastive_theology", "category": "hamartiology", "difficulty": "medium", "input": "The mind set on the flesh is [MASK], but the mind set on the Spirit is life and peace.", "targets": [ "death" ], "foils": [ "life" ], "acceptable_alternatives": [], "failure_examples": [], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Romans 8:6", "reasoning": "Flesh-death vs. Spirit-life antithesis. The first clause takes the negative outcome.", "surface_confounder": "'Life' appears in the contrasting second clause. A model might predict 'life' due to high frequency in the immediate context, but the flesh-slot requires 'death'." }, { "id": "CON_145", "type": "contrastive_theology", "category": "soteriology", "difficulty": "hard", "input": "[MASK] justifies the ungodly.", "targets": [ "God" ], "foils": [ "law" ], "acceptable_alternatives": [ "Christ", "faith" ], "failure_examples": [], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Romans 4:5", "reasoning": "God (not the law) justifies. The law condemns; God justifies through grace.", "surface_confounder": "'Law justifies' is a phrase pattern that appears in Romans \u2014 specifically in passages where Paul DENIES that the law justifies. A naive model might pick up the co-occurrence without the negation." }, { "id": "CON_146", "type": "contrastive_theology", "category": "soteriology", "difficulty": "medium", "input": "The law brings [MASK], but grace brings salvation.", "targets": [ "wrath" ], "foils": [ "salvation" ], "acceptable_alternatives": [ "condemnation", "death", "knowledge" ], "failure_examples": [], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Romans 4:15", "reasoning": "Law-wrath vs. grace-salvation antithesis. The law slot takes the negative outcome.", "surface_confounder": "A model might default to positive outcomes for 'brings' constructions. But the Pauline law-grace antithesis specifically pairs law with wrath/condemnation." }, { "id": "CON_147", "type": "contrastive_theology", "category": "theology_proper", "difficulty": "hard", "input": "The [MASK] created man in his own image.", "targets": [ "LORD" ], "foils": [ "man" ], "acceptable_alternatives": [ "God", "Lord" ], "failure_examples": [], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Genesis 1:27", "reasoning": "Creator-creature distinction. God created man, not the reverse.", "surface_confounder": "'Man created' is a frequent phrase in general language. But in the 'in his own image' creation formula, the subject must be the divine creator." }, { "id": "CON_148", "type": "contrastive_theology", "category": "theology_proper", "difficulty": "medium", "input": "The [MASK] searches the hearts and minds.", "targets": [ "LORD" ], "foils": [ "prophet" ], "acceptable_alternatives": [ "God", "Spirit" ], "failure_examples": [], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Psalm 139:1-2 / Jeremiah 17:10", "reasoning": "Omniscience is a divine attribute. Only God (not prophets) searches hearts comprehensively.", "surface_confounder": "'Prophet searches' could seem plausible \u2014 prophets do discern things. But the comprehensive searching of hearts is a uniquely divine prerogative in biblical theology." }, { "id": "CON_149", "type": "contrastive_theology", "category": "christology", "difficulty": "hard", "input": "The [MASK] sends the Spirit of truth.", "targets": [ "Father" ], "foils": [ "Spirit" ], "acceptable_alternatives": [ "Son", "Christ" ], "failure_examples": [], "pass_condition": "correct_beats_foil", "k": 5, "reference": "John 15:26", "reasoning": "Trinitarian procession: The Father (or Son) sends the Spirit. The Spirit does not send himself in the Johannine farewell discourse.", "surface_confounder": "'Spirit sends the Spirit' has surface coherence, and 'Spirit of truth' might prime the model to predict 'Spirit' as the subject. But Trinitarian theology has the Father/Son as the sender." }, { "id": "CON_150", "type": "contrastive_theology", "category": "christology", "difficulty": "medium", "input": "No one comes to the [MASK] except through me.", "targets": [ "Father" ], "foils": [ "temple" ], "acceptable_alternatives": [], "failure_examples": [], "pass_condition": "correct_beats_foil", "k": 5, "reference": "John 14:6", "reasoning": "Christ as the exclusive mediator to the Father. 'Temple' is a plausible access-point but misses the relational theology.", "surface_confounder": "'Comes to the temple' is a natural biblical phrase (pilgrimage, worship). But Jesus is speaking about relational access to the Father, not physical access to a building." }, { "id": "CON_151", "type": "contrastive_theology", "category": "christology", "difficulty": "hard", "input": "For there is one God, and there is one [MASK] between God and men, the man Christ Jesus.", "targets": [ "mediator" ], "foils": [ "barrier" ], "acceptable_alternatives": [], "failure_examples": [], "pass_condition": "correct_beats_foil", "k": 5, "reference": "1 Timothy 2:5", "reasoning": "Christ as mediator (bridge), not barrier, between God and man.", "surface_confounder": "'Barrier between God and men' sounds theologically plausible (sin as barrier is a common theme). But Christ is the mediator who removes the barrier, not a barrier himself." }, { "id": "CON_152", "type": "contrastive_theology", "category": "eschatology", "difficulty": "medium", "input": "He will come again to [MASK] the living and the dead.", "targets": [ "judge" ], "foils": [ "save" ], "acceptable_alternatives": [], "failure_examples": [], "pass_condition": "correct_beats_foil", "k": 5, "reference": "2 Timothy 4:1 / Apostles' Creed", "reasoning": "Eschatological judgment, not salvation, at the second coming in creedal formulations.", "surface_confounder": "'Save the living and the dead' sounds soteriologically positive. But the creedal/confessional formula specifically uses 'judge' for the parousia function vis-a-vis 'the living and the dead'." }, { "id": "CON_153", "type": "contrastive_theology", "category": "eschatology", "difficulty": "hard", "input": "For the creation [MASK] with eager longing for the revealing of the sons of God.", "targets": [ "waits" ], "foils": [ "groans" ], "acceptable_alternatives": [], "failure_examples": [], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Romans 8:19", "reasoning": "Creation waits (v.19) then groans (v.22). Both verbs apply to creation in Romans 8, but in this specific verse the verb is 'waits' with eager longing.", "surface_confounder": "'Creation groans' appears just three verses later (Rom 8:22) and is also very well-known. Both are theologically correct for creation, but only 'waits' fits v.19's 'eager longing for the revealing'." }, { "id": "CON_154", "type": "contrastive_theology", "category": "hamartiology", "difficulty": "hard", "input": "I do not do the [MASK] I want to do, but the evil I do not want to do, this I keep on doing.", "targets": [ "good" ], "foils": [ "evil" ], "acceptable_alternatives": [], "failure_examples": [], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Romans 7:19", "reasoning": "Paul's inner conflict. The first slot is 'good' (desired but unperformed), not 'evil' (which fills the second clause).", "surface_confounder": "'Evil' appears prominently in the very same sentence. A model might predict 'evil' for the first [MASK] position because of its high salience in the immediate context. But the antithetical structure requires good-then-evil ordering." }, { "id": "CON_155", "type": "contrastive_theology", "category": "theology_proper", "difficulty": "medium", "input": "The Lord does not [MASK] as man sees, for man looks on the outward appearance, but the Lord looks on the heart.", "targets": [ "see" ], "foils": [ "judge" ], "acceptable_alternatives": [ "look" ], "failure_examples": [], "pass_condition": "correct_beats_foil", "k": 5, "reference": "1 Samuel 16:7", "reasoning": "Divine vs. human perception. God does not 'see' (perceive) as man sees \u2014 the contrast is about mode of perception, not judgment.", "surface_confounder": "'The Lord does not judge as man judges' sounds doctrinally correct. But the specific 1 Samuel passage is about seeing/perceiving (choosing David based on the heart, not appearance), not about judicial judgment." }, { "id": "DOC_177", "type": "doctrinal_association", "category": "romans_road", "difficulty": "easy", "input": "for all have [MASK] and fall short of the glory of god", "targets": [ "sinned" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "lived", "come", "gone" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Romans 3:23", "reasoning": "One of the most memorized verses in evangelicalism. 'All have sinned' is the foundational statement of universal human sinfulness.", "surface_confounder": "" }, { "id": "DOC_178", "type": "doctrinal_association", "category": "romans_road", "difficulty": "easy", "input": "for the wages of sin is death but the free gift of god is eternal life in christ [MASK] our lord.", "targets": [ "jesus" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "god", "the", "a" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Romans 6:23", "reasoning": "Wages/gift contrast is central to Pauline soteriology. 'Christ Jesus our Lord' is a fixed Pauline formula.", "surface_confounder": "" }, { "id": "DOC_179", "type": "doctrinal_association", "category": "romans_road", "difficulty": "easy", "input": "but god shows his love for us in that while we were still sinners christ [MASK] for us.", "targets": [ "died" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "lived", "came", "was" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Romans 5:8", "reasoning": "The substitutionary death of Christ is the core of the gospel message. 'Christ died for us' is among the most repeated phrases in Christian text.", "surface_confounder": "" }, { "id": "DOC_180", "type": "doctrinal_association", "category": "romans_road", "difficulty": "easy", "input": "because if you confess with your mouth that jesus is [MASK] and believe in your heart that god raised him from the dead you will be saved.", "targets": [ "lord" ], "foils": [], "acceptable_alternatives": [ "christ" ], "failure_examples": [ "good", "the", "a" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Romans 10:9", "reasoning": "The confession 'Jesus is Lord' is the earliest Christian creed. This verse pairs confession with belief for salvation.", "surface_confounder": "" }, { "id": "DOC_181", "type": "doctrinal_association", "category": "romans_road", "difficulty": "easy", "input": "therefore there is now no condemnation for those who are in [MASK] jesus", "targets": [ "christ" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "the", "a", "with" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Romans 8:1", "reasoning": "'In Christ Jesus' is the most common Pauline positional phrase. No condemnation is the result of union with Christ.", "surface_confounder": "" }, { "id": "DOC_182", "type": "doctrinal_association", "category": "romans_road", "difficulty": "easy", "input": "for everyone who calls on the name of the [MASK] will be saved.", "targets": [ "lord" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "the", "a", "man" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Romans 10:13", "reasoning": "Universality of the gospel offer. 'The name of the Lord' is a deeply rooted Old Testament phrase (Joel 2:32) applied christologically by Paul.", "surface_confounder": "" }, { "id": "DOC_183", "type": "doctrinal_association", "category": "christology", "difficulty": "medium", "input": "He humbled himself by becoming [MASK] to the point of death, even death on a cross.", "targets": [ "obedient" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "resistant", "subject", "accustomed", "indifferent", "closer" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Philippians 2:8", "reasoning": "The kenosis hymn specifically links Christ's humility with volitional obedience unto death. 'Obedient' is the precise term that captures the active, willing nature of his submission. A model predicting 'subject' or 'accustomed' would show it has learned passive subordination rather than the Pauline emphasis on Christ's voluntary descent. Correct prediction demonstrates the model has encoded the Philippians 2 self-humiliation sequence.", "surface_confounder": "subject" }, { "id": "DOC_184", "type": "doctrinal_association", "category": "soteriology", "difficulty": "medium", "input": "He who began a good [MASK] in you will bring it to completion at the day of Jesus Christ.", "targets": [ "work" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "time", "life", "thing", "day", "plan" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Philippians 1:6", "reasoning": "Philippians 1:6 grounds the doctrine of perseverance of the saints in God's initiating and completing activity. 'Work' is the specific term linking divine initiative to sanctification as an ongoing project. A prediction of 'plan' or 'thing' would suggest the model has generic rather than soteriological vocabulary. Predicting 'work' shows the model has learned the Pauline pattern of God's saving activity described as ergon.", "surface_confounder": "plan" }, { "id": "DOC_185", "type": "doctrinal_association", "category": "christology", "difficulty": "medium", "input": "Christ Jesus emptied himself, taking the [MASK] of a servant, being born in the likeness of men.", "targets": [ "form" ], "foils": [], "acceptable_alternatives": [ "role" ], "failure_examples": [ "name", "title", "body", "place", "power" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Philippians 2:7", "reasoning": "The term 'form' (morphe) in the kenosis hymn carries ontological weight \u2014 it asserts that Christ truly took on the essential nature of a servant, not merely the appearance. 'Role' is a tolerable modern synonym but lacks the metaphysical precision. 'Name' or 'title' would reveal the model has learned honorific rather than ontological christological language. Correct prediction shows sensitivity to the morphe vocabulary unique to the Philippians hymn.", "surface_confounder": "name" }, { "id": "DOC_186", "type": "doctrinal_association", "category": "soteriology", "difficulty": "hard", "input": "Not having a righteousness of my own that comes from the [MASK], but that which comes through faith in Christ.", "targets": [ "law" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "world", "heart", "flesh", "past", "self" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "Philippians 3:9", "reasoning": "Paul's autobiographical contrast in Philippians 3 sets law-righteousness against faith-righteousness. 'Law' is the precise antithesis to faith in Paul's soteriology. A model predicting 'flesh' or 'self' reveals generic moral dualism rather than the specific Pauline law-gospel contrast. Predicting 'law' in top-3 demonstrates that the model has learned the tight covenantal framework that structures Paul's argument about his former pharisaic confidence.", "surface_confounder": "flesh" }, { "id": "DOC_187", "type": "doctrinal_association", "category": "ecclesiology", "difficulty": "medium", "input": "Complete my joy by being of the same [MASK], having the same love, being in full accord.", "targets": [ "mind" ], "foils": [], "acceptable_alternatives": [ "spirit" ], "failure_examples": [ "age", "group", "place", "kind", "church" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Philippians 2:2", "reasoning": "Paul's ecclesiology of unity in Philippians centers on shared disposition and purpose, not mere organizational agreement. 'Mind' (phroneo) is the controlling term in Philippians for the attitude believers are to adopt \u2014 the same word family used in 2:5 ('have this mind'). 'Spirit' is an acceptable near-synonym in this context. A model predicting 'church' or 'group' would show it learned sociological rather than dispositional unity language.", "surface_confounder": "heart" }, { "id": "DOC_188", "type": "doctrinal_association", "category": "eschatology", "difficulty": "easy", "input": "But our citizenship is in [MASK], and from it we await a Savior, the Lord Jesus Christ.", "targets": [ "heaven" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "rome", "israel", "christ", "god", "jerusalem" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Philippians 3:20", "reasoning": "The citizenship-in-heaven motif would resonate powerfully with Philippian readers as a Roman colony. 'Heaven' is the unambiguous answer given the eschatological framing of awaiting a Savior. Predicting 'jerusalem' or 'israel' would indicate the model confuses Pauline eschatological geography with Jewish nationalistic categories. Even a minimally trained model should get this given the strong contextual cues.", "surface_confounder": "jerusalem" }, { "id": "DOC_189", "type": "doctrinal_association", "category": "soteriology", "difficulty": "hard", "input": "I press on toward the [MASK] for the prize of the upward call of God in Christ Jesus.", "targets": [ "goal" ], "foils": [], "acceptable_alternatives": [ "mark" ], "failure_examples": [ "end", "light", "cross", "truth", "hope" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "Philippians 3:14", "reasoning": "Paul uses athletic imagery in Philippians 3:13-14 to describe sanctification as a forward-pressing race. 'Goal' (skopos) is the specific term \u2014 'mark' is the KJV rendering and an acceptable variant. 'End' is too generic and misses the purposeful directionality of the metaphor. 'Cross' or 'hope' would indicate the model has learned general Christian vocabulary but not the specific athletic register of this passage. Top-3 prediction reveals sensitivity to Paul's metaphorical framing.", "surface_confounder": "end" }, { "id": "DOC_190", "type": "doctrinal_association", "category": "theology_proper", "difficulty": "medium", "input": "And my God will supply every [MASK] of yours according to his riches in glory in Christ Jesus.", "targets": [ "need" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "prayer", "wish", "desire", "dream", "request" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Philippians 4:19", "reasoning": "Paul's promise of divine provision in 4:19 specifically targets needs, not wants \u2014 a critical distinction in theology proper regarding God's providential care. Predicting 'wish' or 'desire' would indicate the model has absorbed prosperity-theology framing rather than Paul's theology of contentment. 'Need' in context follows Paul's discussion of learned sufficiency in all circumstances (4:11-12), connecting divine provision to genuine necessity.", "surface_confounder": "desire" }, { "id": "CAN_114", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "easy", "input": "I can do all [MASK] through him who strengthens me.", "targets": [ "things" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "good", "this", "works", "deeds", "that" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Philippians 4:13", "reasoning": "Philippians 4:13 is among the most frequently quoted and memorized verses in evangelical contexts. 'Things' is the canonical word in both ESV and NASB. Any model with significant biblical exposure should recall this in top-10. Failure would indicate minimal internalization of even the highest-frequency Philippians text.", "surface_confounder": "" }, { "id": "CAN_115", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "Do not be [MASK] about anything, but in everything by prayer and supplication with thanksgiving let your requests be made known to God.", "targets": [ "anxious" ], "foils": [], "acceptable_alternatives": [ "worried" ], "failure_examples": [ "sad", "angry", "afraid", "hasty", "quiet" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Philippians 4:6", "reasoning": "Philippians 4:6 uses 'anxious' (merimnao) as the specific prohibition paired with the prayer remedy. 'Worried' is a passable modern equivalent. 'Afraid' or 'sad' would indicate the model confuses adjacent emotional states. The surrounding context \u2014 prayer, supplication, thanksgiving \u2014 should strongly activate the anxiety-to-peace semantic pathway that characterizes this passage.", "surface_confounder": "afraid" }, { "id": "CAN_116", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "Work out your own [MASK] with fear and trembling.", "targets": [ "salvation" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "problems", "issues", "faith", "path", "destiny" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Philippians 2:12", "reasoning": "This verse is a key text in debates about perseverance and sanctification. 'Salvation' is the canonical word and the only theologically precise option. Predicting 'faith' would reveal a model that has learned generic Christian piety language but not the specific soteriological emphasis. 'Problems' or 'issues' would indicate complete domain failure. The co-occurrence of 'fear and trembling' with a soteriological object is distinctive to this passage.", "surface_confounder": "faith" }, { "id": "CAN_117", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "easy", "input": "For to me to [MASK] is Christ, and to die is gain.", "targets": [ "live" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "pray", "serve", "hope", "love", "know" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Philippians 1:21", "reasoning": "Philippians 1:21 is one of Paul's most quoted declarations, built on a stark live-die contrast. 'Live' is the only word that creates the parallel with 'die is gain.' The chiastic structure is unmistakable. A model predicting 'serve' or 'pray' has learned Pauline vocabulary broadly but missed the specific literary structure of this famous aphorism.", "surface_confounder": "" }, { "id": "CAN_118", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "Finally, brothers, whatever is [MASK], whatever is honorable, whatever is just, whatever is pure, think about these things.", "targets": [ "true" ], "foils": [], "acceptable_alternatives": [ "noble" ], "failure_examples": [ "good", "great", "best", "right", "holy" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Philippians 4:8", "reasoning": "Philippians 4:8 opens its virtue list with 'true' (alethes) as the first attribute. The sequence true-honorable-just-pure is canonical and memorized by many. 'Good' or 'right' are too generic and miss the specific ordering. 'Noble' appears in some translations for the second term. Correctly predicting 'true' as the first in the series shows the model has internalized the specific lexical sequence of this virtue catalog.", "surface_confounder": "good" }, { "id": "CAN_119", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "easy", "input": "[MASK] in the Lord always; again I will say, rejoice!", "targets": [ "rejoice" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "believe", "trust", "hope", "rest", "remain" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Philippians 4:4", "reasoning": "Joy and rejoicing are the dominant theme of Philippians, and 4:4 is the most concentrated expression. The repetition within the verse itself ('again I will say, rejoice') provides a massive contextual cue. Any model with even basic Philippians exposure should predict 'rejoice' trivially. Failure here would be a strong signal of minimal biblical text ingestion.", "surface_confounder": "" }, { "id": "CON_156", "type": "contrastive_theology", "category": "christology", "difficulty": "hard", "input": "Therefore God has highly [MASK] him and bestowed on him the name that is above every name.", "targets": [ "exalted" ], "foils": [ "created" ], "acceptable_alternatives": [], "failure_examples": [ "praised", "honored", "loved", "blessed", "chosen" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Philippians 2:9", "reasoning": "The contrast between 'exalted' and 'created' maps directly onto the orthodox-Arian divide. 'Exalted' presupposes pre-existence and voluntary descent \u2014 God restores Christ to the glory he willingly set aside. 'Created' would imply Christ is a made being elevated to divine status, the core Arian claim. A model that assigns higher probability to 'created' has absorbed adoptionist or Arian christological patterns rather than the Nicene framework embedded in the Philippians hymn.", "surface_confounder": "created" }, { "id": "CON_157", "type": "contrastive_theology", "category": "soteriology", "difficulty": "medium", "input": "Not having a righteousness of my own but the righteousness from God that depends on [MASK].", "targets": [ "faith" ], "foils": [ "works" ], "acceptable_alternatives": [], "failure_examples": [ "merit", "effort", "law", "obedience", "man" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Philippians 3:9", "reasoning": "The faith-works contrast is the central axis of Pauline soteriology. In Philippians 3:9, Paul explicitly rejects self-derived righteousness in favor of a righteousness received through faith. The foil 'works' represents the legalistic error Paul has just repudiated in his autobiographical account of pharisaic confidence. A model ranking 'works' above 'faith' in this context would demonstrate inverted soteriological polarity \u2014 confusing the rejected basis with the affirmed one.", "surface_confounder": "works" }, { "id": "CON_158", "type": "contrastive_theology", "category": "theology_proper", "difficulty": "medium", "input": "For it is [MASK] who works in you, both to will and to work for his good pleasure.", "targets": [ "god" ], "foils": [ "man" ], "acceptable_alternatives": [], "failure_examples": [ "nature", "fate", "time", "reason", "law" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Philippians 2:13", "reasoning": "Philippians 2:13 is a key text for monergism \u2014 the doctrine that God is the sole efficient cause of regeneration and sanctification. The foil 'man' represents the Pelagian or semi-Pelagian error that human effort is the ultimate source of both willing and doing. The verse immediately follows the command to 'work out your salvation,' making the divine-human agency question acute. A model ranking 'man' above 'God' would reveal Pelagian rather than Augustinian-Reformed soteriological encoding.", "surface_confounder": "man" }, { "id": "CON_159", "type": "contrastive_theology", "category": "eschatology", "difficulty": "hard", "input": "The Lord Jesus Christ will [MASK] our lowly body to be like his glorious body.", "targets": [ "transform" ], "foils": [ "replace" ], "acceptable_alternatives": [ "change" ], "failure_examples": [ "destroy", "remove", "abandon", "judge", "hide" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Philippians 3:21", "reasoning": "The eschatological hope in Philippians 3:21 is bodily transformation, not replacement \u2014 a key distinction against Gnostic dualism that viewed the physical body as inherently evil and disposable. 'Transform' preserves continuity between the present body and the resurrection body. The foil 'replace' encodes the Gnostic error of material-spiritual discontinuity. A model favoring 'replace' has absorbed dualistic rather than incarnational eschatology.", "surface_confounder": "replace" }, { "id": "CON_160", "type": "contrastive_theology", "category": "christology", "difficulty": "hard", "input": "Christ Jesus, who, though he was in the [MASK] of God, did not count equality with God a thing to be grasped.", "targets": [ "form" ], "foils": [ "image" ], "acceptable_alternatives": [ "nature" ], "failure_examples": [ "shadow", "name", "house", "service", "sight" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Philippians 2:6", "reasoning": "The Philippians hymn uses 'form' (morphe) to assert Christ's full ontological participation in the divine nature \u2014 not merely that he reflects or resembles God. The foil 'image' (eikon) carries subordinationist connotations when placed in this context: an image derives from and is lesser than its source, whereas morphe implies essential identity. A model preferring 'image' here has learned a derivative christology (Christ as God's reflection) rather than the co-equal christology the hymn asserts. 'Nature' is an acceptable doctrinal synonym.", "surface_confounder": "image" }, { "id": "CON_161", "type": "contrastive_theology", "category": "soteriology", "difficulty": "medium", "input": "For it has been [MASK] to you that for the sake of Christ you should not only believe in him but also suffer for his sake.", "targets": [ "granted" ], "foils": [ "commanded" ], "acceptable_alternatives": [ "given" ], "failure_examples": [ "told", "said", "shown", "promised", "taught" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Philippians 1:29", "reasoning": "Paul frames both belief and suffering as gracious gifts \u2014 they have been 'granted' (echaristhe, from charis/grace). The foil 'commanded' reframes suffering as legal obligation rather than gracious privilege, losing the grace-theology embedded in the verb's root. A model ranking 'commanded' above 'granted' has learned an imperatival rather than gift-based theology of suffering. The correct prediction shows the model has encoded the Pauline connection between grace and the believer's experience of suffering.", "surface_confounder": "commanded" }, { "id": "DOC_191", "type": "doctrinal_association", "category": "soteriology", "difficulty": "medium", "input": "Therefore, since we have been [MASK] by faith, we have peace with God through our Lord Jesus Christ.", "targets": [ "justified" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "condemned", "punished", "abandoned", "saved", "tested" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Romans 5:1", "reasoning": "Justification by faith is the central forensic declaration of Pauline soteriology. The co-occurrence of 'by faith' and 'peace with God' should activate 'justified' specifically rather than the generic 'saved.' A model predicting 'saved' reveals soteriological domain awareness but lacks ordo salutis precision. 'Condemned' in top-3 would indicate inverted doctrinal polarity.", "surface_confounder": "saved" }, { "id": "DOC_192", "type": "doctrinal_association", "category": "hamartiology", "difficulty": "easy", "input": "For all have [MASK] and fall short of the glory of God.", "targets": [ "sinned" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "risen", "lived", "worked", "spoken", "stood" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Romans 3:23", "reasoning": "Romans 3:23 is one of the highest-frequency verses in evangelical preaching corpora and anchors the doctrine of universal depravity. 'Sinned' is the only theologically coherent completion given the 'fall short of the glory' context. Failure to predict this in top-10 indicates the model has not internalized basic hamartiological vocabulary.", "surface_confounder": "" }, { "id": "DOC_193", "type": "doctrinal_association", "category": "soteriology", "difficulty": "medium", "input": "And those whom he predestined he also called, and those whom he called he also [MASK].", "targets": [ "justified" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "condemned", "rejected", "tested", "healed", "chosen" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Romans 8:30", "reasoning": "The golden chain of Romans 8:30 follows a fixed ordo: foreknew \u2192 predestined \u2192 called \u2192 justified \u2192 glorified. Predicting 'justified' after 'called' tests whether the model has encoded this specific soteriological sequence. 'Chosen' is a surface confounder because election language is adjacent but disrupts the chain's sequential logic.", "surface_confounder": "chosen" }, { "id": "DOC_194", "type": "doctrinal_association", "category": "theology_proper", "difficulty": "medium", "input": "For the [MASK] of God is revealed from heaven against all ungodliness and unrighteousness of men.", "targets": [ "wrath" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "love", "peace", "joy", "blessing", "hand" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Romans 1:18", "reasoning": "Romans 1:18 introduces divine wrath as the necessary backdrop to the gospel. 'Wrath' is constrained by 'revealed from heaven against ungodliness.' A model predicting 'love' here would reveal a sentimentalized theology that cannot hold wrath and love in proper tension. The specificity of 'against all ungodliness' eliminates softer divine attributes.", "surface_confounder": "glory" }, { "id": "DOC_195", "type": "doctrinal_association", "category": "ecclesiology", "difficulty": "medium", "input": "I appeal to you therefore, brothers, to present your [MASK] as a living sacrifice, holy and acceptable to God.", "targets": [ "bodies" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "money", "gifts", "prayers", "children", "works" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Romans 12:1", "reasoning": "Paul's exhortation to present 'bodies' as a living sacrifice bridges indicative theology (Romans 1\u201311) and imperative ethics (Romans 12\u201316). The term 'bodies' is theologically precise \u2014 it grounds Christian worship in embodied obedience, not mere intellectual assent. 'Souls' or 'hearts' would reflect a gnostic spiritualizing tendency that Paul intentionally counters.", "surface_confounder": "hearts" }, { "id": "DOC_196", "type": "doctrinal_association", "category": "theology_proper", "difficulty": "hard", "input": "So then he has [MASK] on whomever he wills, and he hardens whomever he wills.", "targets": [ "mercy" ], "foils": [], "acceptable_alternatives": [ "compassion" ], "failure_examples": [ "wrath", "judgment", "anger", "power", "glory" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "Romans 9:18", "reasoning": "Romans 9:18 pairs divine mercy with divine hardening as twin expressions of sovereignty. The parallel structure ('mercy on whomever... hardens whomever') makes this a test of whether the model encodes the asymmetric mercy/hardening pair. Predicting 'wrath' instead of 'mercy' collapses the deliberate contrast Paul constructs between positive and negative sovereignty.", "surface_confounder": "judgment" }, { "id": "DOC_197", "type": "doctrinal_association", "category": "pneumatology", "difficulty": "medium", "input": "Likewise the [MASK] helps us in our weakness, for we do not know what to pray for as we ought.", "targets": [ "spirit" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "law", "father", "church", "prophet", "angel" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Romans 8:26", "reasoning": "Romans 8:26 attributes intercessory prayer assistance to the Spirit. The context of weakness and not knowing how to pray uniquely constrains the agent to the Holy Spirit in Pauline pneumatology. A model predicting 'church' or 'father' reveals failure to encode the Spirit's specific intercessory role in Romans 8.", "surface_confounder": "" }, { "id": "DOC_198", "type": "doctrinal_association", "category": "soteriology", "difficulty": "easy", "input": "If you confess with your mouth that Jesus is [MASK] and believe in your heart that God raised him from the dead, you will be saved.", "targets": [ "lord" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "good", "real", "alive", "coming", "king" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Romans 10:9", "reasoning": "The confession 'Jesus is Lord' is the earliest Christian creed and the heart of Romans 10:9. 'Lord' (kyrios) carries divine ontological weight \u2014 it is the LXX rendering of YHWH. A model that predicts 'king' or 'god' reveals adjacent but imprecise christological encoding. This is high-frequency and should be easy for any biblically trained model.", "surface_confounder": "christ" }, { "id": "DOC_199", "type": "doctrinal_association", "category": "soteriology", "difficulty": "hard", "input": "And to the one who does not work but believes in him who justifies the [MASK], his faith is counted as righteousness.", "targets": [ "ungodly" ], "foils": [], "acceptable_alternatives": [ "wicked" ], "failure_examples": [ "righteous", "faithful", "worthy", "holy", "elect" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "Romans 4:5", "reasoning": "Romans 4:5 is a hard soteriological test because it contains the scandalous Pauline claim that God justifies the ungodly \u2014 not the righteous. A model that predicts 'righteous' or 'faithful' has inverted the logic of grace and reveals works-righteousness encoding. Only 'ungodly' captures the radical nature of justification sola gratia.", "surface_confounder": "righteous" }, { "id": "DOC_200", "type": "doctrinal_association", "category": "hamartiology", "difficulty": "medium", "input": "Therefore, just as sin came into the world through one [MASK], and death through sin, so death spread to all men.", "targets": [ "man" ], "foils": [], "acceptable_alternatives": [ "person" ], "failure_examples": [ "nation", "angel", "woman", "choice", "law" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Romans 5:12", "reasoning": "Romans 5:12 establishes federal headship \u2014 sin entered through one man (Adam). This is the foundation of original sin and the Adam-Christ typology that structures Romans 5:12\u201321. A model predicting 'woman' reveals confusion about Pauline federal theology, while 'choice' would indicate Pelagian encoding where sin originates in individual decisions rather than representative headship.", "surface_confounder": "woman" }, { "id": "DOC_201", "type": "doctrinal_association", "category": "pneumatology", "difficulty": "hard", "input": "For you did not receive the spirit of slavery to fall back into fear, but you have received the spirit of [MASK].", "targets": [ "adoption" ], "foils": [], "acceptable_alternatives": [ "sonship" ], "failure_examples": [ "freedom", "power", "love", "truth", "peace" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "Romans 8:15", "reasoning": "Paul contrasts the spirit of slavery/fear with the spirit of adoption (huiothesia). 'Adoption' is a precise legal-theological term denoting the bestowal of full filial rights \u2014 distinct from the general concept of 'freedom.' A model predicting 'freedom' captures the slavery contrast but misses the relational and legal specificity that drives Paul's argument toward 'Abba, Father' in the same verse.", "surface_confounder": "freedom" }, { "id": "DOC_202", "type": "doctrinal_association", "category": "soteriology", "difficulty": "easy", "input": "For the [MASK] of sin is death, but the free gift of God is eternal life in Christ Jesus our Lord.", "targets": [ "wages" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "gift", "fruit", "power", "joy", "way" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Romans 6:23", "reasoning": "Romans 6:23 is a core Romans Road verse with extremely high frequency in evangelistic contexts. The 'wages' / 'gift' contrast encodes the earned-versus-free distinction central to grace theology. 'Fruit' is a subtle confounder from Romans 6:21 but breaks the economic metaphor Paul deploys here.", "surface_confounder": "fruit" }, { "id": "DOC_203", "type": "doctrinal_association", "category": "christology", "difficulty": "medium", "input": "But God shows his love for us in that while we were still [MASK], Christ died for us.", "targets": [ "sinners" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "saints", "righteous", "worthy", "living", "faithful" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Romans 5:8", "reasoning": "Romans 5:8 emphasizes the counter-intuitive timing and object of Christ's atoning death \u2014 'while we were still sinners.' The word 'sinners' is constrained by 'while we were still,' which signals a pre-conversion state. Predicting 'saints' or 'righteous' would invert the logic entirely and indicate the model has not grasped substitutionary atonement's target.", "surface_confounder": "weak" }, { "id": "DOC_204", "type": "doctrinal_association", "category": "ecclesiology", "difficulty": "hard", "input": "Do not be [MASK] to this world, but be transformed by the renewal of your mind.", "targets": [ "conformed" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "attached", "drawn", "lost", "bound", "given" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "Romans 12:2", "reasoning": "The conformed/transformed pair in Romans 12:2 uses two distinct Greek compounds (syschematizo / metamorphoo) to contrast external pattern-matching with internal renewal. 'Conformed' is the precise English rendering of the negative command. A model predicting 'attached' or 'bound' captures the separation idea but misses the morphological metaphor Paul intentionally constructs.", "surface_confounder": "attached" }, { "id": "CAN_120", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "easy", "input": "For I am not [MASK] of the gospel, for it is the power of God for salvation to everyone who believes.", "targets": [ "ashamed" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "afraid", "tired", "worthy", "sure", "part" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Romans 1:16", "reasoning": "Romans 1:16 is Paul's thesis statement for the entire epistle. 'Ashamed' is the canonical term in ESV/NASB/KJV. 'Afraid' is a common surface confounder that shares the emotional domain but does not capture the social-honor dynamics that 'ashamed' carries in Greco-Roman culture.", "surface_confounder": "afraid" }, { "id": "CAN_121", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "easy", "input": "And we know that for those who love God all things work together for [MASK].", "targets": [ "good" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "glory", "nothing", "evil", "ever", "god" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Romans 8:28", "reasoning": "Romans 8:28 is among the most memorized verses in the New Testament. 'Good' is the unambiguous canonical target. The simplicity of the word makes this an easy baseline \u2014 any model with meaningful biblical exposure should predict it. 'Glory' is a plausible theological word but is not the canonical term here.", "surface_confounder": "glory" }, { "id": "CAN_122", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "Do not be overcome by evil, but overcome evil with [MASK].", "targets": [ "good" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "force", "power", "prayer", "love", "truth" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Romans 12:21", "reasoning": "The evil/good contrast in Romans 12:21 tests whether the model has encoded the specific Pauline ethical vocabulary. 'Love' is a strong surface confounder because it summarizes the broader ethic of Romans 12\u201313, but the canonical word is 'good.' This tests precise word recall over thematic summarization.", "surface_confounder": "love" }, { "id": "CAN_123", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "There is therefore now no [MASK] for those who are in Christ Jesus.", "targets": [ "condemnation" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "salvation", "hope", "law", "peace", "sin" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Romans 8:1", "reasoning": "Romans 8:1 is the triumphant opening of the Spirit-life chapter. 'Condemnation' is constrained by 'no ___ for those in Christ.' The negation structure ('no ___') requires a negative concept being negated. Predicting 'salvation' or 'hope' would invert the double-negative logic. This tests the model's sensitivity to negation scope in theological discourse.", "surface_confounder": "" }, { "id": "CAN_124", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "So [MASK] comes from hearing, and hearing through the word of Christ.", "targets": [ "faith" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "truth", "wisdom", "power", "grace", "love" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Romans 10:17", "reasoning": "Romans 10:17 defines the instrumental means by which faith arises \u2014 through hearing the proclaimed word. The subject position ('___ comes from hearing') requires a noun that is produced by auditory reception of the gospel. 'Faith' is the only theologically and contextually appropriate completion. 'Wisdom' and 'truth' are generically plausible but do not match the Romans 10 preaching-faith-calling chain.", "surface_confounder": "salvation" }, { "id": "CAN_125", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "Oh, the [MASK] of the riches and wisdom and knowledge of God!", "targets": [ "depth" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "power", "glory", "beauty", "grace", "wonder" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Romans 11:33", "reasoning": "Romans 11:33 opens the great doxology closing the doctrinal section. 'Depth' (bathos) is the exact canonical word. It is surprising because exclamatory contexts in worship language tend toward 'glory' or 'greatness.' A model predicting 'depth' demonstrates it has encoded this specific doxological formula rather than defaulting to generic praise vocabulary.", "surface_confounder": "glory" }, { "id": "CAN_126", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "hard", "input": "For the righteous requirement of the law might be [MASK] in us, who walk not according to the flesh but according to the Spirit.", "targets": [ "fulfilled" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "abolished", "removed", "written", "broken", "changed" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "Romans 8:4", "reasoning": "Romans 8:4 teaches that the law's requirement is fulfilled (not abolished) in Spirit-led believers. This is theologically precise: Paul affirms the law's ongoing moral standard while relocating the power of obedience from flesh to Spirit. 'Abolished' reflects a common misreading of Pauline law theology. 'Written' confuses this with the Jeremiah 31 / Hebrews 8 new covenant formula.", "surface_confounder": "written" }, { "id": "CAN_127", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "easy", "input": "For I am sure that neither death nor [MASK], nor angels nor rulers, nor things present nor things to come, shall be able to separate us from the love of God.", "targets": [ "life" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "sin", "fear", "evil", "hell", "pain" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Romans 8:38", "reasoning": "The 'neither death nor life' merism in Romans 8:38 is one of the great rhetorical structures in Paul. The pairing is a totality formula \u2014 it encompasses all possible states of existence. A model predicting 'sin' or 'fear' reveals it has defaulted to thematically negative words rather than encoding the specific merism pattern where the positive term ('life') is also included as unable to separate.", "surface_confounder": "" }, { "id": "CON_162", "type": "contrastive_theology", "category": "soteriology", "difficulty": "hard", "input": "For we hold that one is justified by [MASK] apart from works of the law.", "targets": [ "faith" ], "foils": [ "works" ], "acceptable_alternatives": [ "grace" ], "failure_examples": [ "merit", "effort", "obedience", "law" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Romans 3:28", "reasoning": "Romans 3:28 is the locus classicus for sola fide. The foil 'works' tests whether the model has encoded the Pauline faith/works antithesis or collapses them. Despite 'works' appearing in the same sentence, the mask position requires 'faith' as the positive instrument of justification. A model ranking 'works' above 'faith' reveals a fundamental failure in Reformation soteriology encoding.", "surface_confounder": "works" }, { "id": "CON_163", "type": "contrastive_theology", "category": "soteriology", "difficulty": "hard", "input": "But if it is by [MASK], it is no longer on the basis of works; otherwise grace would no longer be grace.", "targets": [ "grace" ], "foils": [ "works" ], "acceptable_alternatives": [], "failure_examples": [ "law", "merit", "effort", "nature" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Romans 11:6", "reasoning": "Romans 11:6 states the logical incompatibility of grace and works as grounds for election. The foil 'works' is especially tricky because the word appears twice later in the sentence. The model must resolve the antithetical structure: 'if by ___, no longer works.' Ranking 'works' above 'grace' here would mean the model cannot parse the either/or structure of the grace-works antithesis.", "surface_confounder": "" }, { "id": "CON_164", "type": "contrastive_theology", "category": "soteriology", "difficulty": "medium", "input": "Now to the one who works, his wages are not counted as a [MASK] but as his due.", "targets": [ "gift" ], "foils": [ "debt" ], "acceptable_alternatives": [ "favor" ], "failure_examples": [ "wage", "right", "reward", "blessing" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Romans 4:4", "reasoning": "Romans 4:4 sets up the contrast between wages earned and grace given freely. The foil 'debt' is conceptually adjacent \u2014 wages create an obligation \u2014 but the text explicitly says wages are NOT counted as a gift but as something owed. 'Gift' captures what grace-based reckoning looks like, while 'debt' would reverse the semantic direction Paul is establishing.", "surface_confounder": "debt" }, { "id": "CON_165", "type": "contrastive_theology", "category": "soteriology", "difficulty": "hard", "input": "For the righteousness of God has been manifested apart from the [MASK], although the Law and the Prophets bear witness to it.", "targets": [ "law" ], "foils": [ "gospel" ], "acceptable_alternatives": [], "failure_examples": [ "world", "church", "spirit", "cross" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Romans 3:21", "reasoning": "Romans 3:21 announces that God's righteousness is revealed independently of law-keeping while still attested by the Old Testament. The foil 'gospel' tests whether the model confuses the instrument of revelation (the gospel reveals righteousness) with what righteousness is revealed apart from (the law). This distinction is central to Paul's law-gospel hermeneutic.", "surface_confounder": "gospel" }, { "id": "CON_166", "type": "contrastive_theology", "category": "christology", "difficulty": "medium", "input": "Who was declared to be the Son of God in [MASK] according to the Spirit of holiness by his resurrection from the dead.", "targets": [ "power" ], "foils": [ "weakness" ], "acceptable_alternatives": [], "failure_examples": [ "secret", "name", "truth", "time" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Romans 1:4", "reasoning": "Romans 1:4 declares Christ the Son of God 'in power' through resurrection. The foil 'weakness' inverts the resurrection declaration \u2014 Paul elsewhere uses weakness language for the cross (2 Corinthians 13:4), but the resurrection is explicitly the demonstration of power. A model ranking 'weakness' higher reveals confusion between crucifixion theology and resurrection theology.", "surface_confounder": "weakness" }, { "id": "CON_167", "type": "contrastive_theology", "category": "soteriology", "difficulty": "medium", "input": "For the free [MASK] of God is eternal life in Christ Jesus our Lord.", "targets": [ "gift" ], "foils": [ "wage" ], "acceptable_alternatives": [], "failure_examples": [ "will", "act", "hand", "word" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Romans 6:23b", "reasoning": "The second half of Romans 6:23 deliberately contrasts 'wages of sin' with 'free gift of God.' The foil 'wage' tests whether the model keeps the economic metaphor straight: wages are earned (sin \u2192 death), gifts are free (grace \u2192 life). A model ranking 'wage' above 'gift' has collapsed the works/grace polarity that structures the entire verse.", "surface_confounder": "wage" }, { "id": "CON_168", "type": "contrastive_theology", "category": "hamartiology", "difficulty": "medium", "input": "For those who live according to the [MASK] set their minds on the things of the Spirit.", "targets": [ "spirit" ], "foils": [ "flesh" ], "acceptable_alternatives": [], "failure_examples": [ "law", "world", "mind", "heart" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Romans 8:5", "reasoning": "Romans 8:4\u20135 establishes the flesh/Spirit antithesis that governs the Christian life. The foil 'flesh' exploits the fact that Paul discusses both groups in adjacent verses. The phrase 'set their minds on the things of the Spirit' should constrain the subject to Spirit-walkers, not flesh-walkers. A model confused by the proximity of both terms may rank 'flesh' higher.", "surface_confounder": "flesh" }, { "id": "CON_169", "type": "contrastive_theology", "category": "soteriology", "difficulty": "hard", "input": "For Christ is the end of the [MASK] for righteousness to everyone who believes.", "targets": [ "law" ], "foils": [ "faith" ], "acceptable_alternatives": [], "failure_examples": [ "world", "age", "sin", "road" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Romans 10:4", "reasoning": "Romans 10:4 declares Christ the telos of the law. The foil 'faith' tests whether the model confuses what Christ terminates/fulfills (the law as a path to righteousness) with the instrument through which righteousness comes (faith). 'Faith' is thematically proximate in Romans 10 but structurally wrong in this sentence. This is a hard test of law-gospel precision.", "surface_confounder": "faith" }, { "id": "CON_170", "type": "contrastive_theology", "category": "soteriology", "difficulty": "medium", "input": "Therefore, as one trespass led to [MASK] for all men, so one act of righteousness leads to justification and life for all men.", "targets": [ "condemnation" ], "foils": [ "salvation" ], "acceptable_alternatives": [], "failure_examples": [ "blessing", "peace", "freedom", "glory" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Romans 5:18", "reasoning": "Romans 5:18 contrasts Adam's trespass (\u2192 condemnation) with Christ's righteousness (\u2192 justification). The foil 'salvation' is plausible because the verse concerns soteriological outcomes, but the first clause specifically describes the negative result of Adam's sin. 'Salvation' belongs to the second clause. This tests whether the model tracks the Adam/Christ typological contrast within a single verse.", "surface_confounder": "salvation" }, { "id": "CON_171", "type": "contrastive_theology", "category": "eschatology", "difficulty": "medium", "input": "For if we have been united with him in a death like his, we shall certainly be united with him in a [MASK] like his.", "targets": [ "resurrection" ], "foils": [ "death" ], "acceptable_alternatives": [], "failure_examples": [ "dream", "vision", "journey", "way" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Romans 6:5", "reasoning": "Romans 6:5 pairs union with Christ's death and union with his resurrection. The foil 'death' tests whether the model resolves the death\u2192resurrection sequence or loops back to 'death' because of its prominence in the context. The 'if... certainly' structure signals an escalation from death to something beyond it. A model predicting 'death' reveals a flat reading that misses the sequential logic.", "surface_confounder": "death" }, { "id": "CON_172", "type": "contrastive_theology", "category": "romans_road", "difficulty": "medium", "input": "For God has done what the law, weakened by the [MASK], could not do.", "targets": [ "flesh" ], "foils": [ "spirit" ], "acceptable_alternatives": [], "failure_examples": [ "world", "church", "heart", "mind" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Romans 8:3", "reasoning": "Romans 8:3 identifies the flesh as the reason the law was powerless. The foil 'spirit' is a strong confounder because the flesh/Spirit pair dominates Romans 8 and a model might over-activate 'spirit' from context. But here the law's weakness is specifically located in fallen human nature ('flesh'), not in the Spirit. Ranking 'spirit' above 'flesh' would invert the anthropological problem Paul diagnoses.", "surface_confounder": "spirit" }, { "id": "CON_173", "type": "contrastive_theology", "category": "romans_road", "difficulty": "hard", "input": "For those whom he [MASK] he also predestined to be conformed to the image of his Son.", "targets": [ "foreknew" ], "foils": [ "loved" ], "acceptable_alternatives": [ "chose" ], "failure_examples": [ "created", "saved", "blessed", "heard" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Romans 8:29", "reasoning": "Romans 8:29 begins the golden chain with 'foreknew' (proginosko), a term carrying specific Pauline weight beyond mere prior knowledge \u2014 it implies relational foreordination. The foil 'loved' is theologically close (many Reformed exegetes gloss foreknowledge as fore-love) but is not the canonical term. A model predicting 'loved' over 'foreknew' reveals synonymic approximation rather than precise canonical encoding of the ordo salutis sequence.", "surface_confounder": "loved" }, { "id": "DOC_205", "type": "doctrinal_association", "category": "soteriology", "difficulty": "medium", "input": "Christ Jesus came into the world to [MASK] sinners, of whom I am the foremost.", "targets": [ "save" ], "foils": [], "acceptable_alternatives": [ "redeem" ], "failure_examples": [ "judge", "condemn", "punish", "destroy", "find" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "1 Timothy 1:15", "reasoning": "This is one of Paul's 'faithful sayings' and the core soteriological claim of 1 Timothy. The surrounding context \u2014 'sinners' and 'foremost' \u2014 should strongly activate the salvation domain. A model that predicts 'judge' or 'condemn' has inverted the soteriological polarity of Christ's mission. 'Redeem' is acceptable as a near synonym but 'save' is the canonical term.", "surface_confounder": "find" }, { "id": "DOC_206", "type": "doctrinal_association", "category": "hamartiology", "difficulty": "medium", "input": "For the love of [MASK] is a root of all kinds of evil.", "targets": [ "money" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "sin", "power", "evil", "god", "life" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "1 Timothy 6:10", "reasoning": "This verse is one of the most frequently quoted from the pastoral epistles. The construction 'love of X is a root of all kinds of evil' uniquely identifies money/wealth in Pauline ethics. A model predicting 'sin' reveals circular reasoning (love of sin causes evil), while 'power' would indicate generic ethical reasoning rather than biblical specificity. The surface form 'love of X' could also attract 'god' which would be a critical doctrinal inversion.", "surface_confounder": "power" }, { "id": "DOC_207", "type": "doctrinal_association", "category": "pneumatology", "difficulty": "medium", "input": "For God gave us a spirit not of fear but of [MASK] and love and self-control.", "targets": [ "power" ], "foils": [], "acceptable_alternatives": [ "strength" ], "failure_examples": [ "peace", "hope", "joy", "wisdom", "grace" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "2 Timothy 1:7", "reasoning": "The triadic formula 'power, love, self-control' is a distinctive Pauline description of the Spirit's work in 2 Timothy. 'Power' is the first element contrasted with fear. A model predicting 'peace' or 'joy' shows it has learned generic fruit-of-the-Spirit vocabulary but not the specific 2 Timothy pneumatological formula. The three-part list structure should help disambiguate for a well-trained model.", "surface_confounder": "peace" }, { "id": "DOC_208", "type": "doctrinal_association", "category": "soteriology", "difficulty": "medium", "input": "He saved us and [MASK] us to a holy calling, not because of our works but because of his own purpose and grace.", "targets": [ "called" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "led", "brought", "taught", "gave", "moved" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "2 Timothy 1:9", "reasoning": "The pairing of 'saved' and 'called' is central to the ordo salutis in Reformed soteriology. Paul's usage in 2 Timothy links effectual calling with salvation as coordinate divine acts. A model that predicts 'led' or 'brought' grasps the directional aspect but misses the technical theological vocabulary. Predicting 'called' demonstrates the model has learned the saved-and-called collocation from Pauline corpus.", "surface_confounder": "led" }, { "id": "DOC_209", "type": "doctrinal_association", "category": "ecclesiology", "difficulty": "easy", "input": "If anyone aspires to the office of [MASK], he desires a noble task.", "targets": [ "bishop" ], "foils": [], "acceptable_alternatives": [ "elder", "overseer" ], "failure_examples": [ "priest", "prophet", "king", "deacon", "teacher" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "1 Timothy 3:1", "reasoning": "This verse opens Paul's qualifications for church leadership. The KJV/traditional term 'bishop' (episkopos) and its equivalents 'overseer' and 'elder' are the correct answers. 'Deacon' is a surface confounder since deacon qualifications follow immediately in 1 Timothy 3:8, but the 'noble task' framing specifically introduces the overseer. 'Priest' would indicate a model has imported non-Protestant ecclesiological categories.", "surface_confounder": "deacon" }, { "id": "DOC_210", "type": "doctrinal_association", "category": "soteriology", "difficulty": "easy", "input": "Fight the good fight of [MASK], take hold of the eternal life to which you were called.", "targets": [ "faith" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "war", "god", "truth", "hope", "love" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "1 Timothy 6:12", "reasoning": "The 'fight of faith' is a signature Pauline exhortation in the pastoral epistles. The collocation 'fight of faith' plus 'eternal life' and 'called' creates a strong soteriological context. 'Truth' is a surface confounder since Paul frequently exhorts Timothy to guard truth, but the canonical phrase is specifically 'fight of faith.' A model predicting 'war' shows it grasps the martial metaphor but lacks biblical precision.", "surface_confounder": "truth" }, { "id": "DOC_211", "type": "doctrinal_association", "category": "soteriology", "difficulty": "hard", "input": "Paul described himself as the [MASK] of sinners, yet he received mercy so that Christ might display his perfect patience as an example.", "targets": [ "chief" ], "foils": [], "acceptable_alternatives": [ "worst", "foremost" ], "failure_examples": [ "least", "greatest", "first", "last", "leader" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "1 Timothy 1:15-16", "reasoning": "The KJV phrase 'chief of sinners' is a deeply embedded evangelical idiom. The ESV/NASB use 'foremost' which is multi-wordpiece but theologically equivalent. A model predicting 'least' inverts Paul's self-assessment. 'Greatest' is semantically close but lacks the specific biblical register. This tests whether the model has internalized the traditional English phrasing of Paul's radical self-identification in 1 Timothy.", "surface_confounder": "greatest" }, { "id": "DOC_212", "type": "doctrinal_association", "category": "ecclesiology", "difficulty": "hard", "input": "Paul described the church of the living God as the pillar and [MASK] of the truth.", "targets": [ "ground" ], "foils": [], "acceptable_alternatives": [ "foundation", "support" ], "failure_examples": [ "source", "light", "keeper", "voice", "sword" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "1 Timothy 3:15", "reasoning": "The KJV renders this as 'pillar and ground of the truth,' while the ESV uses 'pillar and buttress.' The church's role as upholder \u2014 not originator \u2014 of truth is a key ecclesiological distinction. A model predicting 'source' confuses the church's supportive role with an authoritative one, which is a significant Protestant-Catholic dividing line. 'Ground' or 'foundation' demonstrates the model has learned the supportive metaphor.", "surface_confounder": "source" }, { "id": "DOC_213", "type": "doctrinal_association", "category": "bibliology", "difficulty": "easy", "input": "All Scripture is breathed out by God and [MASK] for teaching, for reproof, for correction.", "targets": [ "profitable" ], "foils": [], "acceptable_alternatives": [ "useful", "good" ], "failure_examples": [ "required", "designed", "written", "intended", "sacred" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "2 Timothy 3:16", "reasoning": "2 Timothy 3:16 is the locus classicus for the doctrine of biblical inspiration. The word 'profitable' (or 'useful') follows the inspiration claim and establishes Scripture's functional authority. A model predicting 'written' would collapse the inspiration claim into a mere authorship statement. 'Sacred' would conflate the result of inspiration with the utility claim that follows it in the verse.", "surface_confounder": "written" }, { "id": "DOC_214", "type": "doctrinal_association", "category": "bibliology", "difficulty": "hard", "input": "Paul urged Timothy to [MASK] the good deposit entrusted to him by the Holy Spirit who dwells within us.", "targets": [ "guard" ], "foils": [], "acceptable_alternatives": [ "keep", "protect" ], "failure_examples": [ "spread", "share", "teach", "preach", "use" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "2 Timothy 1:14", "reasoning": "The imperative to 'guard the deposit' is Paul's charge to Timothy regarding apostolic teaching. The preservation metaphor \u2014 guard, not spread or teach \u2014 indicates the stewardship responsibility for doctrinal integrity. A model predicting 'spread' or 'preach' captures the Great Commission register but misses the specific custodial emphasis of the pastoral epistles. 'Guard' demonstrates the model has encoded the deposit-of-faith preservation language.", "surface_confounder": "spread" }, { "id": "CAN_128", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "easy", "input": "I have fought the good fight, I have finished the [MASK], I have kept the faith.", "targets": [ "race" ], "foils": [], "acceptable_alternatives": [ "course" ], "failure_examples": [ "war", "battle", "work", "task", "journey" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "2 Timothy 4:7", "reasoning": "This is Paul's valedictory statement and one of the most quoted verses from the pastoral epistles. The triadic structure \u2014 fight, race, faith \u2014 is highly distinctive. 'Course' is the KJV rendering and equally valid. A model that predicts 'war' or 'battle' shows contamination from the adjacent 'fight' metaphor without attending to the shift in athletic imagery.", "surface_confounder": "battle" }, { "id": "CAN_129", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "Have nothing to do with irreverent, silly myths. Rather, train yourself for [MASK].", "targets": [ "godliness" ], "foils": [], "acceptable_alternatives": [ "piety" ], "failure_examples": [ "holiness", "service", "battle", "heaven", "wisdom" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "1 Timothy 4:7", "reasoning": "The exhortation to 'train yourself for godliness' is a key pastoral imperative. 'Godliness' (eusebeia) is a signature term of the pastoral epistles, appearing 15 times across 1-2 Timothy and Titus. 'Holiness' is a plausible theological synonym but not the canonical word. A model predicting the exact term demonstrates it has encoded the pastoral epistles' distinctive vocabulary rather than generic piety language.", "surface_confounder": "holiness" }, { "id": "CAN_130", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "easy", "input": "Henceforth there is laid up for me the [MASK] of righteousness, which the Lord, the righteous judge, will award to me on that day.", "targets": [ "crown" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "gift", "reward", "prize", "robe", "seal" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "2 Timothy 4:8", "reasoning": "The 'crown of righteousness' is a well-known eschatological reward image. The construction 'X of righteousness' with 'award' and 'judge' strongly constrains the answer. A model predicting 'gift' misses the competition/reward metaphor that Paul sustains from the fight-and-race imagery of the preceding verse. 'Crown' is a single token and appears in multiple NT reward passages.", "surface_confounder": "reward" }, { "id": "CAN_131", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "Do your best to present yourself to God as one approved, a worker who has no need to be [MASK], rightly handling the word of truth.", "targets": [ "ashamed" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "afraid", "worried", "corrected", "tested", "judged" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "2 Timothy 2:15", "reasoning": "The phrase 'worker who has no need to be ashamed' is a signature pastoral exhortation. The co-occurrence of 'approved,' 'worker,' and 'rightly handling the word of truth' creates a strong canonical context. 'Afraid' is a surface confounder that shares the emotional-state slot but lacks the honor/shame framework Paul employs. A correct prediction shows the model has learned the shame-avoidance motivational structure of pastoral exhortation.", "surface_confounder": "afraid" }, { "id": "CAN_132", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "But godliness with [MASK] is great gain.", "targets": [ "contentment" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "wealth", "wisdom", "patience", "peace", "joy" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "1 Timothy 6:6", "reasoning": "This verse introduces Paul's critique of prosperity-driven religion. The pairing of 'godliness' with 'contentment' rather than 'wealth' is the theological point \u2014 the great gain is not material. A model predicting 'wealth' has inverted the passage's message. 'Peace' or 'joy' are plausible generic Christian virtue terms but miss the specific economic-contentment argument Paul makes in the surrounding verses about those who desire to be rich.", "surface_confounder": "wealth" }, { "id": "CAN_133", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "hard", "input": "By the Holy Spirit who dwells within us, guard the good [MASK] entrusted to you.", "targets": [ "deposit" ], "foils": [], "acceptable_alternatives": [ "treasure" ], "failure_examples": [ "news", "word", "faith", "gift", "message" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "2 Timothy 1:14", "reasoning": "The term 'deposit' (paratheke) is a technical Pauline term for the body of apostolic teaching entrusted to Timothy. This is harder than it appears because 'word,' 'faith,' 'news,' and 'message' all fit the slot generically but miss the specific custodial metaphor. 'Deposit' implies something received and preserved intact, which is central to the pastoral epistles' concern with doctrinal fidelity. A correct prediction at top-3 shows the model has encoded this specialized vocabulary.", "surface_confounder": "word" }, { "id": "CAN_134", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "The saying is trustworthy and deserving of full [MASK]: Christ Jesus came into the world to save sinners.", "targets": [ "acceptance" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "praise", "glory", "honor", "faith", "trust" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "1 Timothy 1:15", "reasoning": "The 'faithful saying' formula is unique to the pastoral epistles and appears five times. The specific phrase 'deserving of full acceptance' is the ESV/NASB rendering. 'Praise' and 'glory' are worship-register words that could fill the slot in generic Christian discourse but miss the epistemic-endorsement function of 'acceptance.' A correct prediction shows the model has learned this distinctive pastoral formula.", "surface_confounder": "praise" }, { "id": "CON_174", "type": "contrastive_theology", "category": "christology", "difficulty": "hard", "input": "For there is one God, and there is one [MASK] between God and men, the man Christ Jesus.", "targets": [ "mediator" ], "foils": [ "priest" ], "acceptable_alternatives": [], "failure_examples": [ "bridge", "link", "path", "lord", "judge" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "1 Timothy 2:5", "reasoning": "The term 'mediator' (mesites) in 1 Timothy 2:5 is Paul's specific christological designation. The foil 'priest' is drawn from the Hebrews christology where Christ is called high priest. A model that ranks 'priest' above 'mediator' has blended the pastoral epistles' mediation vocabulary with Hebrews' priestly vocabulary. This distinction matters because 'mediator' emphasizes the go-between role while 'priest' emphasizes sacrificial function \u2014 both true but the Pauline pastoral term is 'mediator.'", "surface_confounder": "priest" }, { "id": "CON_175", "type": "contrastive_theology", "category": "pneumatology", "difficulty": "hard", "input": "God gave us a spirit not of [MASK] but of power and love and self-control.", "targets": [ "fear" ], "foils": [ "weakness" ], "acceptable_alternatives": [ "timidity" ], "failure_examples": [ "sin", "death", "hate", "doubt", "pride" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "2 Timothy 1:7", "reasoning": "The canonical contrast in 2 Timothy 1:7 is fear versus power-love-self-control. The foil 'weakness' is a strong confounder because Paul explicitly contrasts with 'power,' making 'weakness' the logical antonym. However, the text says 'fear' (deilia), which is psychologically specific \u2014 it is about cowardice in the face of persecution, not general inadequacy. A model ranking 'weakness' above 'fear' has reasoned by antonym-matching rather than canonical recall.", "surface_confounder": "weakness" }, { "id": "CON_176", "type": "contrastive_theology", "category": "hamartiology", "difficulty": "medium", "input": "In the last days people will be lovers of self, lovers of [MASK], proud, arrogant, abusive.", "targets": [ "money" ], "foils": [ "pleasure" ], "acceptable_alternatives": [], "failure_examples": [ "sin", "power", "evil", "fame", "self" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "2 Timothy 3:2", "reasoning": "Paul's vice list in 2 Timothy 3:2-4 contains both 'lovers of money' (v.2) and 'lovers of pleasure' (v.4). The foil 'pleasure' is drawn from the same passage but appears two verses later. A model that ranks 'pleasure' above 'money' in this position shows it has learned the vice-list domain but cannot disambiguate positional vocabulary within the list. The 'lovers of self, lovers of X' sequential pattern should activate the verse-2 term 'money' specifically.", "surface_confounder": "pleasure" }, { "id": "CON_177", "type": "contrastive_theology", "category": "bibliology", "difficulty": "hard", "input": "All Scripture is [MASK] by God and profitable for teaching, for reproof, for correction, and for training in righteousness.", "targets": [ "inspired" ], "foils": [ "approved" ], "acceptable_alternatives": [ "given" ], "failure_examples": [ "written", "read", "spoken", "revealed", "created" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "2 Timothy 3:16", "reasoning": "The NASB renders theopneustos as 'inspired by God' while the ESV uses 'breathed out by God.' The foil 'approved' would reduce the doctrine of inspiration to a mere divine endorsement of human writing, which is a lower view of Scripture than Paul intends. A model ranking 'approved' above 'inspired' reveals it has not learned the distinction between divine origination (inspiration) and divine sanction (approval). This is a key test of bibliology precision.", "surface_confounder": "approved" }, { "id": "CON_178", "type": "contrastive_theology", "category": "eschatology", "difficulty": "medium", "input": "I have fought the good [MASK], I have finished the race, I have kept the faith.", "targets": [ "fight" ], "foils": [ "war" ], "acceptable_alternatives": [], "failure_examples": [ "battle", "cause", "game", "work", "test" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "2 Timothy 4:7", "reasoning": "Paul uses 'fight' (agon) \u2014 an athletic contest metaphor \u2014 not 'war' (polemos), which would imply military conflict. The foil 'war' is theologically significant because it shifts the frame from personal endurance in faith to cosmic warfare, which is a different NT motif (Ephesians 6). A model ranking 'war' above 'fight' conflates Paul's athletic perseverance imagery with the spiritual warfare register. The triadic structure (fight, race, faith) is internally consistent as athletic metaphor.", "surface_confounder": "war" }, { "id": "CON_179", "type": "contrastive_theology", "category": "christology", "difficulty": "medium", "input": "Christ Jesus gave himself as a [MASK] for all, which is the testimony given at the proper time.", "targets": [ "ransom" ], "foils": [ "sacrifice" ], "acceptable_alternatives": [], "failure_examples": [ "gift", "price", "payment", "offering", "sign" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "1 Timothy 2:6", "reasoning": "Paul's term is 'ransom' (antilytron), not 'sacrifice' (thysia). The foil 'sacrifice' comes from the Levitical-Hebrews atonement register. While both describe Christ's atoning work, 'ransom' emphasizes the liberation-payment metaphor (redemption from bondage) while 'sacrifice' emphasizes the cultic-offering metaphor. A model that correctly predicts 'ransom' over 'sacrifice' has differentiated Pauline pastoral atonement vocabulary from the priestly atonement vocabulary of Hebrews.", "surface_confounder": "sacrifice" }, { "id": "CON_180", "type": "contrastive_theology", "category": "soteriology", "difficulty": "medium", "input": "Christ Jesus came into the world to save [MASK], of whom I am the foremost.", "targets": [ "sinners" ], "foils": [ "saints" ], "acceptable_alternatives": [], "failure_examples": [ "people", "souls", "nations", "israel", "men" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "1 Timothy 1:15", "reasoning": "The object of Christ's saving mission is 'sinners,' not 'saints.' The foil 'saints' is a strong confounder because NT collocations frequently pair Christ with saints (holy ones). A model ranking 'saints' above 'sinners' has failed to encode the gospel paradox at the heart of 1 Timothy \u2014 that Christ came specifically for those who are unrighteous. Paul's self-identification as 'foremost' among the group further constrains the answer to a negative self-referent.", "surface_confounder": "saints" }, { "id": "CON_181", "type": "contrastive_theology", "category": "hamartiology", "difficulty": "hard", "input": "Paul warned that in later times some will depart from the [MASK], devoting themselves to deceitful spirits and teachings of demons.", "targets": [ "faith" ], "foils": [ "church" ], "acceptable_alternatives": [ "truth" ], "failure_examples": [ "lord", "law", "world", "spirit", "way" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "1 Timothy 4:1", "reasoning": "Paul warns about departure from 'the faith' (objective body of doctrine), not 'the church' (institutional community). The foil 'church' tests whether the model confuses apostasy (abandoning doctrine) with schism (leaving the community). This is a significant ecclesiological-hamartiological distinction: one can leave the church without leaving the faith, and vice versa. A model ranking 'faith' above 'church' has correctly encoded Paul's concern in 1 Timothy 4 as doctrinal apostasy rather than institutional departure.", "surface_confounder": "church" }, { "id": "CON_182", "type": "contrastive_theology", "category": "soteriology", "difficulty": "hard", "input": "He saved us and called us to a holy calling, not because of our works but because of his own purpose and [MASK].", "targets": [ "grace" ], "foils": [ "reward" ], "acceptable_alternatives": [ "mercy" ], "failure_examples": [ "power", "will", "glory", "law", "plan" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "2 Timothy 1:9", "reasoning": "Paul's pairing of 'purpose and grace' in 2 Timothy 1:9 is a classic statement of sovereign grace in salvation. The foil 'reward' inverts the soteriological logic entirely \u2014 reward implies merit-based recompense, which is precisely what Paul denies with 'not because of our works.' A model ranking 'reward' above 'grace' has failed to encode the antithesis between works-righteousness and grace that structures this verse. 'Mercy' is an acceptable alternative as it preserves the unmerited-favor category.", "surface_confounder": "reward" }, { "id": "DOC_215", "type": "doctrinal_association", "category": "soteriology", "difficulty": "medium", "input": "He saved us, not because of works done by us in righteousness, but according to his own [MASK].", "targets": [ "mercy" ], "foils": [], "acceptable_alternatives": [ "grace" ], "failure_examples": [ "power", "wisdom", "anger", "justice", "will" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Titus 3:5", "reasoning": "Titus 3:5 grounds salvation in divine mercy contrasted with human works-righteousness. A model that predicts 'mercy' demonstrates it has learned the Pauline pattern of negating human merit and attributing salvation to God's compassionate initiative. 'Grace' is an acceptable near-synonym since Paul uses both interchangeably in this soteriological frame. Predictions like 'power' or 'wisdom' would reveal the model has not encoded the affective-relational dimension of God's saving act.", "surface_confounder": "will" }, { "id": "DOC_216", "type": "doctrinal_association", "category": "pneumatology", "difficulty": "medium", "input": "He saved us through the washing of regeneration and [MASK] of the Holy Spirit.", "targets": [ "renewal" ], "foils": [], "acceptable_alternatives": [ "renewing" ], "failure_examples": [ "power", "gift", "fire", "presence", "baptism" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Titus 3:5", "reasoning": "The 'washing of regeneration and renewal of the Holy Spirit' is one of the most concentrated pneumatological statements in the Pastorals. 'Renewal' specifically names the Spirit's ongoing transformative work as distinct from initial regeneration. A model predicting 'power' or 'fire' has learned Pentecostal-adjacent Spirit associations but missed the Pauline emphasis on internal moral transformation. 'Baptism' as a top prediction would conflate the sacramental metaphor with the Spirit's distinct role.", "surface_confounder": "baptism" }, { "id": "DOC_217", "type": "doctrinal_association", "category": "soteriology", "difficulty": "medium", "input": "Christ Jesus gave himself for us to [MASK] us from all lawlessness and to purify for himself a people for his own possession.", "targets": [ "redeem" ], "foils": [], "acceptable_alternatives": [ "free", "save", "deliver" ], "failure_examples": [ "judge", "condemn", "teach", "lead", "bless" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Titus 2:14", "reasoning": "Titus 2:14 employs marketplace-ransom soteriology: Christ's self-giving purchases liberation from lawlessness. 'Redeem' is the precise theological term encoding substitutionary cost. 'Save' and 'free' capture the liberation dimension but lose the price-paid connotation. A model predicting 'judge' or 'condemn' has inverted the soteriological polarity of the passage entirely.", "surface_confounder": "save" }, { "id": "DOC_218", "type": "doctrinal_association", "category": "ecclesiology", "difficulty": "easy", "input": "This is why I left you in Crete, so that you might put what remained into order, and appoint [MASK] in every town as I directed you.", "targets": [ "elders" ], "foils": [], "acceptable_alternatives": [ "leaders" ], "failure_examples": [ "prophets", "apostles", "kings", "judges", "priests" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Titus 1:5", "reasoning": "Titus 1:5 is a foundational ecclesiology text establishing the pastoral pattern of appointing local church officers. 'Elders' is the standard English rendering of presbyteroi and should be strongly activated by the Cretan church-planting context. A model predicting 'apostles' or 'prophets' has failed to distinguish foundational-era offices from ongoing local-church polity.", "surface_confounder": "priests" }, { "id": "DOC_219", "type": "doctrinal_association", "category": "soteriology", "difficulty": "hard", "input": "So that being [MASK] by his grace we might become heirs according to the hope of eternal life.", "targets": [ "justified" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "loved", "chosen", "blessed", "filled", "kept" ], "pass_condition": "target_in_top_k", "k": 3, "reference": "Titus 3:7", "reasoning": "This verse requires the model to select the forensic-declarative term 'justified' over more generic soteriological vocabulary. The co-occurrence of 'by his grace' and 'heirs' narrows the field to justification specifically \u2014 the legal declaration that precedes inheritance. 'Blessed' or 'loved' would indicate the model learned positive-affect religious language but not the Pauline ordo salutis. Top-3 is demanding because 'saved' will compete strongly.", "surface_confounder": "saved" }, { "id": "DOC_220", "type": "doctrinal_association", "category": "eschatology", "difficulty": "easy", "input": "Waiting for our blessed [MASK], the appearing of the glory of our great God and Savior Jesus Christ.", "targets": [ "hope" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "day", "king", "lord", "time", "gift" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Titus 2:13", "reasoning": "The 'blessed hope' is a signature eschatological phrase in evangelical vocabulary, heavily drawn from Titus 2:13. The appositive structure ('our blessed hope, the appearing') makes this relatively easy \u2014 the model needs only to have encountered this high-frequency collocation. 'Day' as a top prediction would reveal generic eschatological encoding without the specific Titus 2:13 formula.", "surface_confounder": "day" }, { "id": "DOC_221", "type": "doctrinal_association", "category": "theology_proper", "difficulty": "medium", "input": "Paul, a servant of God, in hope of eternal life, which God, who never [MASK], promised before the ages began.", "targets": [ "lies" ], "foils": [], "acceptable_alternatives": [ "fails" ], "failure_examples": [ "changes", "sleeps", "dies", "forgets", "rests" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Titus 1:2", "reasoning": "Titus 1:2 grounds the reliability of the gospel promise in the attribute of divine veracity \u2014 God cannot lie. 'Lies' is the canonical ESV/NASB term and tests whether the model has encoded truthfulness as a core divine attribute tied to promissory theology. 'Changes' would indicate theological drift toward immutability language, which is adjacent but misses the specific veracity claim. 'Fails' captures reliability but loses the moral-truthfulness dimension.", "surface_confounder": "changes" }, { "id": "CAN_135", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "easy", "input": "For the [MASK] of God has appeared, bringing salvation for all people.", "targets": [ "grace" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "wrath", "law", "power", "word", "kingdom" ], "pass_condition": "target_in_top_k", "k": 10, "reference": "Titus 2:11", "reasoning": "Titus 2:11 is one of the most quoted verses from the Pastorals and a cornerstone of grace theology. The personification of grace as 'appearing' is distinctive. 'Grace' should be trivially recalled in top-10 for any model with evangelical training exposure. 'Wrath' or 'law' as top predictions would indicate catastrophic doctrinal inversion.", "surface_confounder": "" }, { "id": "CAN_136", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "To the [MASK], all things are pure, but to the defiled and unbelieving, nothing is pure.", "targets": [ "pure" ], "foils": [], "acceptable_alternatives": [ "clean" ], "failure_examples": [ "wise", "holy", "faithful", "strong", "righteous" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Titus 1:15", "reasoning": "Titus 1:15 uses a chiastic purity structure \u2014 'to the pure, all things are pure.' The repetition of the purity concept is the key canonical feature. A model must recall the exact lexical echo rather than substituting a generic positive attribute like 'righteous' or 'holy.' This tests verbatim canonical retention of a distinctive rhetorical pattern.", "surface_confounder": "righteous" }, { "id": "CAN_137", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "But when the goodness and loving kindness of God our [MASK] appeared, he saved us.", "targets": [ "savior" ], "foils": [], "acceptable_alternatives": [], "failure_examples": [ "lord", "king", "father", "creator", "master" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Titus 3:4", "reasoning": "The title 'God our Savior' is a distinctive Pastoral Epistles formula appearing six times in 1 Timothy and Titus. This tests whether the model has internalized this specific divine epithet rather than defaulting to the more frequent 'Lord' or 'Father.' Correct recall demonstrates exposure to the Pastorals' characteristic theology-proper vocabulary.", "surface_confounder": "lord" }, { "id": "CAN_138", "type": "canonical_knowledge", "category": "canonical_knowledge", "difficulty": "medium", "input": "They profess to know God, but they [MASK] him by their works.", "targets": [ "deny" ], "foils": [], "acceptable_alternatives": [ "reject" ], "failure_examples": [ "love", "serve", "honor", "worship", "praise" ], "pass_condition": "target_in_top_k", "k": 5, "reference": "Titus 1:16", "reasoning": "Titus 1:16 is a sharp indictment of nominal faith contradicted by behavior. The structure 'profess... but deny' requires the model to complete a contrastive pair. 'Deny' is the canonical ESV/NASB verb. A model predicting 'love' or 'serve' has failed to detect the adversative 'but' that flips the valence from profession to contradiction.", "surface_confounder": "honor" }, { "id": "CON_183", "type": "contrastive_theology", "category": "christology", "difficulty": "hard", "input": "Waiting for the appearing of the glory of our great God and [MASK] Jesus Christ.", "targets": [ "savior" ], "foils": [ "prophet" ], "acceptable_alternatives": [], "failure_examples": [ "teacher", "brother", "servant", "friend" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Titus 2:13", "reasoning": "Titus 2:13 is a key Granville Sharp text: 'our great God and Savior Jesus Christ' applies both 'God' and 'Savior' to the same person, Jesus. The foil 'prophet' represents a reductionist Christology that denies deity \u2014 a model confused about the ontological status of Christ in the Pastorals would rank 'prophet' near 'savior.' Correct ranking demonstrates the model has encoded the high Christology of this verse, not merely a respectful-but-subordinate title.", "surface_confounder": "prophet" }, { "id": "CON_184", "type": "contrastive_theology", "category": "soteriology", "difficulty": "hard", "input": "He saved us, not because of [MASK] done by us in righteousness, but according to his own mercy.", "targets": [ "works" ], "foils": [ "faith" ], "acceptable_alternatives": [ "deeds" ], "failure_examples": [ "prayers", "gifts", "sins", "words", "laws" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Titus 3:5", "reasoning": "The 'not because of works... but according to mercy' structure is the central sola gratia claim of Titus. The foil 'faith' is theologically subtle: while salvation is through faith, the verse specifically negates human works as the cause. A model that ranks 'faith' above 'works' here has confused the instrument of salvation with the negated ground of salvation \u2014 a common semi-Pelagian conflation. Correct ranking shows the model has parsed the negation structure accurately.", "surface_confounder": "faith" }, { "id": "CON_185", "type": "contrastive_theology", "category": "soteriology", "difficulty": "hard", "input": "So that being justified by his [MASK] we might become heirs according to the hope of eternal life.", "targets": [ "grace" ], "foils": [ "works" ], "acceptable_alternatives": [], "failure_examples": [ "law", "blood", "name", "power", "word" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Titus 3:7", "reasoning": "Titus 3:7 states the instrumental cause of justification as grace, following the Pauline pattern (cf. Romans 3:24). The foil 'works' represents the legalistic error Paul explicitly negates two verses earlier. A model ranking 'works' above 'grace' in this context has catastrophically mislearned the Pauline sola gratia framework. Even 'blood' \u2014 while theologically valid in other contexts \u2014 misses the specific grace-instrument claim of this verse.", "surface_confounder": "works" }, { "id": "CON_186", "type": "contrastive_theology", "category": "hamartiology", "difficulty": "medium", "input": "To the pure all things are pure, but to the [MASK] and unbelieving, nothing is pure.", "targets": [ "defiled" ], "foils": [ "ignorant" ], "acceptable_alternatives": [ "corrupt", "wicked" ], "failure_examples": [ "weak", "poor", "simple", "young", "lost" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Titus 1:15", "reasoning": "Titus 1:15 diagnoses the problem of the impure as moral defilement, not epistemic deficiency. The foil 'ignorant' represents a rationalist anthropology where sin is reducible to lack of knowledge \u2014 a Socratic rather than Pauline hamartiology. A model ranking 'ignorant' above 'defiled' has encoded a shallow view of human corruption. Correct ranking demonstrates the model has learned that Paul frames impurity as a volitional-moral category paired with unbelief, not an intellectual one.", "surface_confounder": "ignorant" }, { "id": "CON_187", "type": "contrastive_theology", "category": "ecclesiology", "difficulty": "medium", "input": "For an [MASK], as God's steward, must be above reproach.", "targets": [ "overseer" ], "foils": [ "apostle" ], "acceptable_alternatives": [ "elder", "bishop" ], "failure_examples": [ "prophet", "deacon", "priest", "king", "angel" ], "pass_condition": "correct_beats_foil", "k": 5, "reference": "Titus 1:7", "reasoning": "Titus 1:5\u20137 uses 'elder' and 'overseer' interchangeably, establishing the Protestant identification of these offices. The foil 'apostle' tests whether the model confuses ongoing local-church offices with the foundational apostolic role. A model that ranks 'apostle' above 'overseer' in this stewardship-qualification context has conflated church polity categories. 'Bishop' is acceptable as a translation variant of episkopos.", "surface_confounder": "apostle" } ] }