Spaces:
Sleeping
Sleeping
File size: 3,251 Bytes
6a849b2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 | -- Task 3: Query Optimization & Indexing
-- A financial reporting system runs a critical aggregation query across
-- a large sales_transactions table. It is doing a FULL TABLE SCAN,
-- making it extremely slow at scale.
--
-- The agent is given the slow query and must:
-- 1) Run EXPLAIN QUERY PLAN to diagnose why it's slow
-- 2) CREATE the correct index(es) on the right columns
-- 3) Optionally: rewrite the query to be more efficient
-- 4) Re-run EXPLAIN to verify a SCAN has become a SEARCH (Index Scan)
--
-- The grader checks: does EXPLAIN on the final query show "SEARCH" not "SCAN"?
CREATE TABLE IF NOT EXISTS sales_transactions (
id INTEGER PRIMARY KEY,
order_id TEXT NOT NULL,
customer_id INTEGER NOT NULL,
product_id INTEGER NOT NULL,
region TEXT NOT NULL,
sale_date TEXT NOT NULL,
quantity INTEGER NOT NULL,
unit_price REAL NOT NULL,
discount REAL NOT NULL DEFAULT 0.0,
total_amount REAL NOT NULL,
status TEXT NOT NULL -- 'completed','pending','refunded'
);
CREATE TABLE IF NOT EXISTS products (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL,
category TEXT NOT NULL,
supplier_id INTEGER NOT NULL
);
CREATE TABLE IF NOT EXISTS regions (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL,
zone TEXT NOT NULL
);
INSERT INTO products VALUES
(1, 'Laptop Pro', 'Electronics', 101),
(2, 'USB Hub', 'Accessories', 102),
(3, 'Desk Chair', 'Furniture', 103),
(4, 'Monitor 27"', 'Electronics', 101),
(5, 'Keyboard MX', 'Accessories', 102),
(6, 'Standing Desk', 'Furniture', 103),
(7, 'Webcam HD', 'Electronics', 104),
(8, 'Headphones', 'Electronics', 104),
(9, 'Mouse Pad', 'Accessories', 102),
(10,'Filing Cabinet','Furniture', 103);
INSERT INTO regions VALUES
(1, 'North America', 'West'),
(2, 'Europe', 'East'),
(3, 'Asia Pacific', 'East'),
(4, 'Latin America', 'West');
-- Generate 2000 synthetic transactions for a realistic slow scan scenario
WITH RECURSIVE cnt(x) AS (SELECT 1 UNION ALL SELECT x+1 FROM cnt WHERE x<2000)
INSERT INTO sales_transactions
SELECT
x,
'ORD-' || printf('%06d', x),
(x % 500) + 1,
(x % 10) + 1,
CASE (x % 4) WHEN 0 THEN 'North America' WHEN 1 THEN 'Europe' WHEN 2 THEN 'Asia Pacific' ELSE 'Latin America' END,
date('2023-01-01', '+' || (x % 365) || ' days'),
(x % 10) + 1,
round(50.0 + (x % 200), 2),
round((x % 20) * 0.01, 2),
round((50.0 + (x % 200)) * ((1+(x%10)) - (x%20)*0.01), 2),
CASE (x % 3) WHEN 0 THEN 'completed' WHEN 1 THEN 'pending' ELSE 'refunded' END
FROM cnt;
-- THE SLOW QUERY (given to the agent):
-- SELECT
-- p.category,
-- st.region,
-- SUM(st.total_amount) AS total_revenue,
-- COUNT(st.id) AS order_count
-- FROM sales_transactions st
-- JOIN products p ON st.product_id = p.id
-- WHERE st.sale_date BETWEEN '2023-06-01' AND '2023-09-30'
-- AND st.status = 'completed'
-- GROUP BY p.category, st.region
-- ORDER BY total_revenue DESC;
--
-- SOLUTION: CREATE INDEX idx_sales_date_status ON sales_transactions(sale_date, status);
-- After indexing, EXPLAIN QUERY PLAN shows "SEARCH" instead of "SCAN"
|