codeflow-ai / rag /sample_templates /15_subquery_optimization.sql
unknown
Initial commit: CodeFlow AI - NL to SQL Generator
7814c1f
-- Template: Subquery Optimization - NOT IN vs NOT EXISTS
-- Pattern: Efficient ways to exclude records based on subquery
-- Use Case: Find records that don't match conditions, anti-joins
-- ❌ SLOWER: NOT IN with subquery (issues with NULL values)
SELECT customer_id, name
FROM customers
WHERE customer_id NOT IN (
SELECT customer_id
FROM orders
WHERE order_date >= '2024-01-01'
);
-- Problem: If orders.customer_id has ANY NULL, entire query returns empty!
-- βœ… BETTER: NOT EXISTS (faster and NULL-safe)
SELECT c.customer_id, c.name
FROM customers c
WHERE NOT EXISTS (
SELECT 1
FROM orders o
WHERE o.customer_id = c.customer_id
AND o.order_date >= '2024-01-01'
);
-- βœ… ALTERNATIVE: LEFT JOIN with NULL check
SELECT c.customer_id, c.name
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id
AND o.order_date >= '2024-01-01'
WHERE o.order_id IS NULL;
-- Example: Products NOT ordered in specific date range
SELECT p.product_id, p.name, p.category
FROM products p
WHERE NOT EXISTS (
SELECT 1
FROM order_items oi
JOIN orders o ON oi.order_id = o.order_id
WHERE oi.product_id = p.product_id
AND o.order_date >= '2024-01-01'
AND o.order_date < '2024-04-01'
);
-- Key Concepts:
-- - NOT IN fails if subquery returns NULL
-- - NOT EXISTS is NULL-safe and often faster
-- - LEFT JOIN with IS NULL also works well
-- - NOT EXISTS can short-circuit (stops at first match)
-- - Use SELECT 1 in EXISTS for efficiency