TurboSkillSlug / sample_cases.py
legendarydragontamer's picture
deploy
51a9974
Raw
History Blame Contribute Delete
3.26 kB
"""Sample eval cases: each session paired with a DISTINCT held-out task in the
same class. These are illustrative; swap in your real sessions when running."""
from skill_uplift_eval import EvalCase
CASES = [
EvalCase(
name="tree_dp_to_new_tree_problem",
session_extraction={
"themes": ["tree dynamic programming", "processing order"],
"approaches_tried": [
{"approach": "top-down recursion", "why_it_failed": "recomputed each subtree per ancestor, O(n^2)"},
{"approach": "process leaves first", "why_it_failed": "a parent's value needs children finalized first"},
],
"dead_ends": [{"position": 0.3, "what_happened": "stack overflow on deep trees"}],
"breakthroughs": [{"position": 0.85, "what_worked": "compute bottom-up, deepest nodes first, memoizing subtree results"}],
"gotchas": [
"Top-down recursion recomputes subtrees for every ancestor, making it O(n^2); compute bottom-up once instead.",
"Processing leaves first feels natural but a parent depends on its children being finalized; process deepest-first.",
],
"sentiment_arc": {"start": "frustrated", "end": "resolved"},
},
# DISTINCT task: a different tree-DP problem, same class
task_prompt=("Given a tree where each node has a value, compute for every node "
"the maximum sum of any path from that node down to a leaf, efficiently. "
"Describe the algorithm and its time complexity. Be concise."),
answer_key_terms=["bottom-up", "post-order", "children", "O(n)"],
# if the skill literally contained THIS task's answer it'd be leakage
leak_terms=["maximum sum of any path from that node down to a leaf"],
),
EvalCase(
name="markov_to_new_absorbing_chain",
session_extraction={
"themes": ["markov chains", "absorbing states"],
"approaches_tried": [
{"approach": "simulate many runs", "why_it_failed": "variance too high to converge on the exact value"},
{"approach": "solve the full linear system", "why_it_failed": "singular at the absorbing state's row"},
],
"dead_ends": [{"position": 0.4, "what_happened": "matrix inversion failed, singular"}],
"breakthroughs": [{"position": 0.85, "what_worked": "drop the absorbing row, solve only the transient states' first-step equations"}],
"gotchas": [
"The absorbing-state row makes the system singular; exclude it and solve only transient states.",
"Expected hitting time is not absorption probability; do not conflate them.",
],
"sentiment_arc": {"start": "frustrated", "end": "resolved"},
},
task_prompt=("In a random walk on states 0..4 where 0 and 4 are absorbing, find the "
"probability of being absorbed at 4 starting from state 2. Describe the "
"method, not just the number. Be concise."),
answer_key_terms=["transient", "first-step", "linear", "exclude"],
leak_terms=["absorbed at 4 starting from state 2"],
),
]