Spaces:
Sleeping
Sleeping
| """Hand-authored pre-seed library β 8 playbook entries + 4 drift cards. | |
| Tag sets here match the tags on each corresponding scenario | |
| so a Jaccard top-k against the scenario's tag set returns | |
| a relevant entry for every one of the 10 scenarios β with two | |
| generic fallbacks for coverage on novel anti-patterns. | |
| """ | |
| from __future__ import annotations | |
| from skill_library.entries import DriftAdaptationCard, PlaybookEntry | |
| PRESEED_PLAYBOOK: tuple[PlaybookEntry, ...] = ( | |
| # 1. Correlated subquery β LEFT JOIN + GROUP BY | |
| PlaybookEntry( | |
| tag_set=frozenset({"correlated_subquery", "projection_subquery"}), | |
| before_snippet=( | |
| "SELECT u.*, (SELECT COUNT(*) FROM orders o WHERE o.user_id=u.id) FROM users u" | |
| ), | |
| after_snippet=( | |
| "SELECT u.*, COALESCE(c.n, 0) FROM users u " | |
| "LEFT JOIN (SELECT user_id, COUNT(*) n FROM orders GROUP BY user_id) c " | |
| "ON c.user_id = u.id" | |
| ), | |
| avg_speedup=6.0, | |
| scenario_family="ecommerce", | |
| ), | |
| # 2. SELECT * + join β project only needed columns | |
| PlaybookEntry( | |
| tag_set=frozenset({"select_star", "over_projection", "join"}), | |
| before_snippet="SELECT * FROM a JOIN b ON a.id=b.a_id", | |
| after_snippet="SELECT a.id, a.name, b.amount FROM a JOIN b ON a.id=b.a_id", | |
| avg_speedup=2.5, | |
| scenario_family="ecommerce", | |
| ), | |
| # 3. Cartesian join β add explicit ON clause | |
| PlaybookEntry( | |
| tag_set=frozenset({"cartesian", "missing_join_condition"}), | |
| before_snippet="SELECT * FROM a, b WHERE a.region = 'US'", | |
| after_snippet="SELECT a.col FROM a JOIN b ON a.id = b.a_id WHERE a.region = 'US'", | |
| avg_speedup=50.0, | |
| scenario_family="events", | |
| ), | |
| # 4. DISTINCT on GROUP BY β drop one | |
| PlaybookEntry( | |
| tag_set=frozenset({"distinct", "redundant_distinct", "group_by"}), | |
| before_snippet="SELECT DISTINCT tenant_id, count(*) FROM logs GROUP BY tenant_id", | |
| after_snippet="SELECT tenant_id, count(*) FROM logs GROUP BY tenant_id", | |
| avg_speedup=1.4, | |
| scenario_family="saas_logs", | |
| ), | |
| # 5. Nested IN-subquery β JOIN | |
| PlaybookEntry( | |
| tag_set=frozenset({"nested_subquery", "in_subquery"}), | |
| before_snippet="WHERE id IN (SELECT x_id FROM x WHERE ... )", | |
| after_snippet="JOIN x ON x.x_id = table.id WHERE ...", | |
| avg_speedup=3.0, | |
| scenario_family="cms", | |
| ), | |
| # 6. HAVING filter on groupable column β push to WHERE | |
| PlaybookEntry( | |
| tag_set=frozenset({"having_as_where", "aggregate_filter"}), | |
| before_snippet="GROUP BY x, status HAVING status = 'fulfilled'", | |
| after_snippet="WHERE status = 'fulfilled' GROUP BY x", | |
| avg_speedup=2.0, | |
| scenario_family="ecommerce", | |
| ), | |
| # 7. Generic: prefer JOINs over correlated subqueries | |
| PlaybookEntry( | |
| tag_set=frozenset({"subquery", "generic"}), | |
| before_snippet="scalar subquery in SELECT list", | |
| after_snippet="LEFT JOIN with aggregated CTE", | |
| avg_speedup=4.0, | |
| scenario_family="ecommerce", | |
| ), | |
| # 8. Generic: project only used columns | |
| PlaybookEntry( | |
| tag_set=frozenset({"over_projection", "generic"}), | |
| before_snippet="SELECT *", | |
| after_snippet="SELECT <only needed columns>", | |
| avg_speedup=1.8, | |
| scenario_family="ecommerce", | |
| ), | |
| ) | |
| PRESEED_DRIFT_CARDS: tuple[DriftAdaptationCard, ...] = ( | |
| DriftAdaptationCard( | |
| drift_kind="column_rename", | |
| symptom_regex=r'column ".+" does not exist', | |
| recovery_template=( | |
| "Read the changelog, update every identifier referencing the old column, and resubmit." | |
| ), | |
| success_rate=0.9, | |
| ), | |
| DriftAdaptationCard( | |
| drift_kind="date_format", | |
| symptom_regex=r"Could not convert string .+ to TIMESTAMP|BIGINT", | |
| recovery_template=( | |
| "Epoch-ms columns are BIGINT; cast your filter bounds with " | |
| "`EXTRACT(EPOCH FROM TIMESTAMP '...') * 1000` or use numeric literals." | |
| ), | |
| success_rate=0.85, | |
| ), | |
| DriftAdaptationCard( | |
| drift_kind="enum_rule", | |
| symptom_regex=r"(empty|zero) result set on filter `... = 'active'`", | |
| recovery_template=( | |
| "A single enum value may have been split into several; use `IN " | |
| "('ACTIVE', 'ACTIVE_V2')` instead of equality." | |
| ), | |
| success_rate=0.8, | |
| ), | |
| DriftAdaptationCard( | |
| drift_kind="field_deprecation", | |
| symptom_regex=r'column ".+" does not exist|non-existent column', | |
| recovery_template=( | |
| "The inline field was replaced by a FK; JOIN the lookup table and " | |
| "project the human-readable name from there." | |
| ), | |
| success_rate=0.75, | |
| ), | |
| ) | |
| __all__ = ["PRESEED_DRIFT_CARDS", "PRESEED_PLAYBOOK"] | |