| """Sample eval cases: each session paired with a DISTINCT held-out task in the |
| same class. These are illustrative; swap in your real sessions when running.""" |
| from skill_uplift_eval import EvalCase |
|
|
| CASES = [ |
| EvalCase( |
| name="tree_dp_to_new_tree_problem", |
| session_extraction={ |
| "themes": ["tree dynamic programming", "processing order"], |
| "approaches_tried": [ |
| {"approach": "top-down recursion", "why_it_failed": "recomputed each subtree per ancestor, O(n^2)"}, |
| {"approach": "process leaves first", "why_it_failed": "a parent's value needs children finalized first"}, |
| ], |
| "dead_ends": [{"position": 0.3, "what_happened": "stack overflow on deep trees"}], |
| "breakthroughs": [{"position": 0.85, "what_worked": "compute bottom-up, deepest nodes first, memoizing subtree results"}], |
| "gotchas": [ |
| "Top-down recursion recomputes subtrees for every ancestor, making it O(n^2); compute bottom-up once instead.", |
| "Processing leaves first feels natural but a parent depends on its children being finalized; process deepest-first.", |
| ], |
| "sentiment_arc": {"start": "frustrated", "end": "resolved"}, |
| }, |
| |
| task_prompt=("Given a tree where each node has a value, compute for every node " |
| "the maximum sum of any path from that node down to a leaf, efficiently. " |
| "Describe the algorithm and its time complexity. Be concise."), |
| answer_key_terms=["bottom-up", "post-order", "children", "O(n)"], |
| |
| leak_terms=["maximum sum of any path from that node down to a leaf"], |
| ), |
| EvalCase( |
| name="markov_to_new_absorbing_chain", |
| session_extraction={ |
| "themes": ["markov chains", "absorbing states"], |
| "approaches_tried": [ |
| {"approach": "simulate many runs", "why_it_failed": "variance too high to converge on the exact value"}, |
| {"approach": "solve the full linear system", "why_it_failed": "singular at the absorbing state's row"}, |
| ], |
| "dead_ends": [{"position": 0.4, "what_happened": "matrix inversion failed, singular"}], |
| "breakthroughs": [{"position": 0.85, "what_worked": "drop the absorbing row, solve only the transient states' first-step equations"}], |
| "gotchas": [ |
| "The absorbing-state row makes the system singular; exclude it and solve only transient states.", |
| "Expected hitting time is not absorption probability; do not conflate them.", |
| ], |
| "sentiment_arc": {"start": "frustrated", "end": "resolved"}, |
| }, |
| task_prompt=("In a random walk on states 0..4 where 0 and 4 are absorbing, find the " |
| "probability of being absorbed at 4 starting from state 2. Describe the " |
| "method, not just the number. Be concise."), |
| answer_key_terms=["transient", "first-step", "linear", "exclude"], |
| leak_terms=["absorbed at 4 starting from state 2"], |
| ), |
| ] |
|
|