abpt / src /data /qwen_rerank_cases.py
Search
feat: add src/ module for script imports
8125804
from __future__ import annotations
from dataclasses import dataclass
@dataclass(frozen=True)
class QwenRerankCase:
name: str
family: str
prompt: str
preferred: str
rejected: str
expected_mode: str
def make_qwen_rerank_cases() -> list[QwenRerankCase]:
return [
QwenRerankCase(
name="quantifier_stable",
family="quantifier",
expected_mode="stable",
prompt=(
"Claim: for all natural numbers n greater than zero, the statement is true. "
"Continue in a way that preserves a universal mathematical reading."
),
preferred=" Therefore the continuation keeps the universal claim and does not switch to an existential witness.",
rejected=" Therefore it is enough to give one witness, so the universal claim can be dropped.",
),
QwenRerankCase(
name="quantifier_conflict",
family="quantifier",
expected_mode="conflict",
prompt=(
"Claim: for all natural numbers n greater than zero, the statement is true. "
"Then the text shifts toward saying there exists a witness instead. Continue the reasoning."
),
preferred=" The correction is to reject that existential drift and restore the original universal statement.",
rejected=" So the proof now only needs one witness and the universal statement no longer matters.",
),
QwenRerankCase(
name="proof_mode_stable",
family="proof_mode",
expected_mode="stable",
prompt=(
"We assume the negation, derive a contradiction, and conclude the original claim holds. "
"Continue in a way that preserves proof-by-contradiction style."
),
preferred=" We therefore keep the contradiction structure and discharge the assumed negation.",
rejected=" We now abandon contradiction mode and switch to a direct constructive proof from scratch.",
),
QwenRerankCase(
name="proof_mode_conflict",
family="proof_mode",
expected_mode="conflict",
prompt=(
"We assume the negation and derive a contradiction. "
"Halfway through, the text starts acting like a direct proof instead. Continue carefully."
),
preferred=" The continuation should return to contradiction mode and conclude that the assumption was false.",
rejected=" The continuation should drop contradiction and proceed as an ordinary direct proof.",
),
QwenRerankCase(
name="induction_stable",
family="induction",
expected_mode="stable",
prompt=(
"We prove the statement by induction: first the base case, then the induction step, "
"then the successor case. Continue the proof in induction mode."
),
preferred=" The next move is to apply the induction hypothesis in the step from n to n plus one.",
rejected=" A single illustrative example is enough, so the induction step is unnecessary.",
),
QwenRerankCase(
name="induction_conflict",
family="induction",
expected_mode="conflict",
prompt=(
"We prove the statement by induction with a base case and an induction hypothesis. "
"Later the text starts arguing from an arbitrary example instead of the induction step. Continue the proof."
),
preferred=" Repair the argument by reinstating the induction hypothesis and proving the successor case.",
rejected=" Continue from the arbitrary example and ignore the induction hypothesis entirely.",
),
QwenRerankCase(
name="api_framework_stable",
family="api_framework",
expected_mode="stable",
prompt=(
"We are documenting a FastAPI service with async request handlers, dependency injection, "
"and Pydantic models. Continue the technical explanation in that same framework."
),
preferred=" Keep the explanation on async FastAPI handlers, dependency injection, and Pydantic validation.",
rejected=" Reframe the service as a synchronous Django class-based view with template rendering.",
),
QwenRerankCase(
name="api_framework_conflict",
family="api_framework",
expected_mode="conflict",
prompt=(
"We are documenting a FastAPI service with async request handlers. "
"Later the text starts describing it as a synchronous Django view layer. Continue the technical explanation."
),
preferred=" Correct the drift and return to async FastAPI handlers with typed request models.",
rejected=" Continue describing a synchronous Django view and middleware stack.",
),
QwenRerankCase(
name="instruction_constraints_stable",
family="instruction_constraints",
expected_mode="stable",
prompt=(
"Task: answer in exactly three bullet points, keep the tone formal, and avoid speculation. "
"Continue by planning the answer while preserving those constraints."
),
preferred=" Use exactly three formal bullet points and avoid speculative language.",
rejected=" Write a casual paragraph with creative guesses and no bullet structure.",
),
QwenRerankCase(
name="instruction_constraints_conflict",
family="instruction_constraints",
expected_mode="conflict",
prompt=(
"Task: answer in exactly three bullet points, keep the tone formal, and avoid speculation. "
"Later the text starts encouraging a casual long-form narrative with creative guesses. Continue the planning."
),
preferred=" Return to three formal bullet points and avoid unsupported guesses.",
rejected=" Lean into a casual long narrative with speculative commentary.",
),
QwenRerankCase(
name="entity_property_stable",
family="entity_property",
expected_mode="stable",
prompt=(
"The report states that the patient is allergic to penicillin and therefore receives a non-penicillin antibiotic. "
"Continue the clinical note while preserving that allergy constraint."
),
preferred=" Continue with a safe non-penicillin alternative and keep the allergy warning explicit.",
rejected=" Recommend amoxicillin as the routine next step despite the documented allergy.",
),
QwenRerankCase(
name="entity_property_conflict",
family="entity_property",
expected_mode="conflict",
prompt=(
"The report states that the patient is allergic to penicillin. "
"Later the text starts recommending amoxicillin as the routine first-line choice. Continue the clinical note."
),
preferred=" Flag the contradiction and avoid amoxicillin because of the penicillin allergy.",
rejected=" Proceed with amoxicillin as the routine recommendation.",
),
QwenRerankCase(
name="legal_scope_stable",
family="legal_scope",
expected_mode="stable",
prompt=(
"Draft a contract clause that applies only within the territory of Kazakhstan and only for non-commercial use. "
"Continue the clause while preserving those scope limits."
),
preferred=" The clause remains limited to Kazakhstan and to non-commercial use only.",
rejected=" The clause now grants worldwide commercial sublicensing rights.",
),
QwenRerankCase(
name="legal_scope_conflict",
family="legal_scope",
expected_mode="conflict",
prompt=(
"Draft a contract clause that applies only within the territory of Kazakhstan and only for non-commercial use. "
"Later the draft begins to claim worldwide rights for unrestricted commercial sublicensing. Continue the clause."
),
preferred=" Narrow the language back to Kazakhstan-only non-commercial use.",
rejected=" Expand the rights to worldwide commercial sublicensing.",
),
QwenRerankCase(
name="units_stable",
family="units",
expected_mode="stable",
prompt=(
"We measure the rod length in centimeters and report the final result as 125 cm. "
"Continue the calculation notes while preserving the same unit system."
),
preferred=" Keep the result in centimeters and avoid any unit change without conversion.",
rejected=" Treat 125 as meters without converting from centimeters.",
),
QwenRerankCase(
name="units_conflict",
family="units",
expected_mode="conflict",
prompt=(
"We measure the rod length in centimeters and report the final result as 125 cm. "
"Later the text starts treating 125 as if it were already measured in meters without any conversion. Continue the calculation notes."
),
preferred=" Correct the calculation by converting units before making any statement in meters.",
rejected=" Continue as if 125 cm already means 125 meters.",
),
]