codeflow-ai / rag /sample_templates /13_union_dedupe.sql
unknown
Initial commit: CodeFlow AI - NL to SQL Generator
7814c1f
-- Template: UNION vs UNION ALL for Deduplication
-- Pattern: Combine results from multiple queries
-- Use Case: Merge data from different sources, combine segments
-- UNION ALL: Keeps all rows including duplicates (FASTER)
SELECT customer_id, name, 'Active' as status
FROM customers
WHERE last_order_date >= CURRENT_DATE - INTERVAL '90 days'
UNION ALL
SELECT customer_id, name, 'VIP' as status
FROM customers
WHERE total_lifetime_value > 10000;
-- UNION: Removes duplicates (SLOWER but cleaner)
SELECT customer_id, name, email
FROM customers
WHERE city = 'New York'
UNION
SELECT customer_id, name, email
FROM customers
WHERE total_lifetime_value > 5000;
-- Example: Combine current and archived orders
SELECT
order_id,
customer_id,
order_date,
total_amount,
'current' as source
FROM orders
WHERE order_date >= '2024-01-01'
UNION ALL
SELECT
order_id,
customer_id,
order_date,
total_amount,
'archived' as source
FROM archived_orders
WHERE order_date < '2024-01-01';
-- Key Concepts:
-- - UNION removes duplicates (requires sort/compare - slower)
-- - UNION ALL keeps all rows (faster, no deduplication)
-- - All SELECT statements must have same number and type of columns
-- - Use UNION ALL when you know there are no duplicates
-- - Add source identifier column to track origin