Spaces:
Paused
Paused
File size: 15,881 Bytes
5a81b95 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 | -- Enable detailed query analysis
SET track_io_timing = ON;
SET enable_timing = ON;
\echo '=========================================================================='
\echo '=== Coalition Alignment Matrix - Detailed Subquery Analysis ==='
\echo '=========================================================================='
\echo ''
\echo 'This analysis breaks down the coalition view into individual CTEs'
\echo 'to identify performance bottlenecks and optimization opportunities.'
\echo ''
-- ===========================================================================
-- SECTION 1: Analyze CTE 1 - party_votes (Base Data Filter)
-- ===========================================================================
\echo ''
\echo '=========================================================================='
\echo '=== CTE 1: party_votes (5-year vote filter) ==='
\echo '=========================================================================='
\echo ''
EXPLAIN (ANALYZE true, VERBOSE true, BUFFERS true, TIMING true, SUMMARY true, FORMAT text)
SELECT vote_data.party,
vote_data.embedded_id_ballot_id AS ballot_id,
vote_data.vote,
vote_data.vote_date
FROM public.vote_data
WHERE vote_data.vote_date >= (CURRENT_DATE - '5 years'::interval)
AND vote_data.party IS NOT NULL
AND vote_data.vote IS NOT NULL;
\echo ''
\echo '--- CTE 1 Analysis Summary ---'
\echo 'Purpose: Filter vote_data for last 5 years with non-null party/vote'
\echo 'Expected: Should use index on vote_date + party + vote'
\echo 'Watch for: Sequential scans, missing indexes'
\echo ''
-- ===========================================================================
-- SECTION 2: Analyze CTE 2 - party_pairs (Cartesian Product)
-- ===========================================================================
\echo ''
\echo '=========================================================================='
\echo '=== CTE 2: party_pairs (All party combinations) ==='
\echo '=========================================================================='
\echo ''
EXPLAIN (ANALYZE true, VERBOSE true, BUFFERS true, TIMING true, SUMMARY true, FORMAT text)
WITH party_votes AS (
SELECT vote_data.party,
vote_data.embedded_id_ballot_id AS ballot_id,
vote_data.vote,
vote_data.vote_date
FROM public.vote_data
WHERE vote_data.vote_date >= (CURRENT_DATE - '5 years'::interval)
AND vote_data.party IS NOT NULL
AND vote_data.vote IS NOT NULL
)
SELECT DISTINCT p1.party AS party1, p2.party AS party2
FROM party_votes p1
CROSS JOIN party_votes p2
WHERE p1.party < p2.party;
\echo ''
\echo '--- CTE 2 Analysis Summary ---'
\echo 'Purpose: Generate all unique party pairs (lower-triangle matrix)'
\echo 'Expected: DISTINCT on small result set (8-12 parties = 28-66 pairs)'
\echo 'Watch for: Expensive CROSS JOIN materialization'
\echo ''
-- ===========================================================================
-- SECTION 3: Analyze CTE 3 - alignment_metrics (Heavy Aggregation)
-- ===========================================================================
\echo ''
\echo '=========================================================================='
\echo '=== CTE 3: alignment_metrics (Vote alignment aggregation) ==='
\echo '=========================================================================='
\echo ''
EXPLAIN (ANALYZE true, VERBOSE true, BUFFERS true, TIMING true, SUMMARY true, FORMAT text)
WITH party_votes AS (
SELECT vote_data.party,
vote_data.embedded_id_ballot_id AS ballot_id,
vote_data.vote,
vote_data.vote_date
FROM public.vote_data
WHERE vote_data.vote_date >= (CURRENT_DATE - '5 years'::interval)
AND vote_data.party IS NOT NULL
AND vote_data.vote IS NOT NULL
), party_pairs AS (
SELECT DISTINCT p1.party AS party1, p2.party AS party2
FROM party_votes p1
CROSS JOIN party_votes p2
WHERE p1.party < p2.party
)
SELECT pp.party1,
pp.party2,
count(DISTINCT pv1.ballot_id) AS total_votes,
count(DISTINCT CASE WHEN pv1.vote = pv2.vote THEN pv1.ballot_id END) AS aligned_votes,
count(DISTINCT CASE WHEN pv1.vote = 'Ja' AND pv2.vote = 'Ja' THEN pv1.ballot_id END) AS both_yes,
count(DISTINCT CASE WHEN pv1.vote = 'Nej' AND pv2.vote = 'Nej' THEN pv1.ballot_id END) AS both_no,
count(DISTINCT CASE WHEN pv1.vote = 'Avstå' OR pv2.vote = 'Avstå' THEN pv1.ballot_id END) AS abstention_count,
min(pv1.vote_date) AS earliest_vote,
max(pv1.vote_date) AS latest_vote
FROM party_pairs pp
JOIN party_votes pv1 ON pv1.party = pp.party1
JOIN party_votes pv2 ON pv2.party = pp.party2 AND pv2.ballot_id = pv1.ballot_id
GROUP BY pp.party1, pp.party2;
\echo ''
\echo '--- CTE 3 Analysis Summary ---'
\echo 'Purpose: Calculate alignment metrics for each party pair'
\echo 'Expected: Hash joins on party, nested loop on ballot_id'
\echo 'Watch for: Multiple COUNT(DISTINCT), hash aggregation spills'
\echo ''
-- ===========================================================================
-- SECTION 4: Analyze JOIN Strategy Between CTEs
-- ===========================================================================
\echo ''
\echo '=========================================================================='
\echo '=== JOIN Analysis: party_pairs → party_votes (pv1) ==='
\echo '=========================================================================='
\echo ''
EXPLAIN (ANALYZE true, VERBOSE true, BUFFERS true, TIMING true, FORMAT text)
WITH party_votes AS (
SELECT vote_data.party,
vote_data.embedded_id_ballot_id AS ballot_id,
vote_data.vote,
vote_data.vote_date
FROM public.vote_data
WHERE vote_data.vote_date >= (CURRENT_DATE - '5 years'::interval)
AND vote_data.party IS NOT NULL
AND vote_data.vote IS NOT NULL
), party_pairs AS (
SELECT DISTINCT p1.party AS party1, p2.party AS party2
FROM party_votes p1 CROSS JOIN party_votes p2
WHERE p1.party < p2.party
)
SELECT pp.party1, pp.party2, COUNT(*) AS pv1_matches
FROM party_pairs pp
JOIN party_votes pv1 ON pv1.party = pp.party1
GROUP BY pp.party1, pp.party2
LIMIT 10;
\echo ''
\echo '--- First JOIN Analysis ---'
\echo 'Join type: party_pairs → party_votes (filtering by party1)'
\echo 'Expected: Hash join or nested loop'
\echo ''
\echo ''
\echo '=========================================================================='
\echo '=== JOIN Analysis: pv1 → party_votes (pv2) on ballot_id ==='
\echo '=========================================================================='
\echo ''
EXPLAIN (ANALYZE true, VERBOSE true, BUFFERS true, TIMING true, FORMAT text)
WITH party_votes AS (
SELECT vote_data.party,
vote_data.embedded_id_ballot_id AS ballot_id,
vote_data.vote,
vote_data.vote_date
FROM public.vote_data
WHERE vote_data.vote_date >= (CURRENT_DATE - '5 years'::interval)
AND vote_data.party IS NOT NULL
AND vote_data.vote IS NOT NULL
), party_pairs AS (
SELECT DISTINCT p1.party AS party1, p2.party AS party2
FROM party_votes p1 CROSS JOIN party_votes p2
WHERE p1.party < p2.party
)
SELECT pp.party1, pp.party2, COUNT(*) AS matching_ballots
FROM party_pairs pp
JOIN party_votes pv1 ON pv1.party = pp.party1
JOIN party_votes pv2 ON pv2.party = pp.party2 AND pv2.ballot_id = pv1.ballot_id
GROUP BY pp.party1, pp.party2
LIMIT 10;
\echo ''
\echo '--- Second JOIN Analysis ---'
\echo 'Join type: pv1 → party_votes (pv2) matching ballot_id'
\echo 'Expected: Hash join on ballot_id with party filter'
\echo 'Watch for: Nested loop if index available'
\echo ''
-- ===========================================================================
-- SECTION 5: Analyze COUNT(DISTINCT) Performance
-- ===========================================================================
\echo ''
\echo '=========================================================================='
\echo '=== Aggregation Analysis: COUNT(DISTINCT ballot_id) Performance ==='
\echo '=========================================================================='
\echo ''
EXPLAIN (ANALYZE true, VERBOSE true, BUFFERS true, TIMING true, FORMAT text)
WITH party_votes AS (
SELECT vote_data.party,
vote_data.embedded_id_ballot_id AS ballot_id,
vote_data.vote,
vote_data.vote_date
FROM public.vote_data
WHERE vote_data.vote_date >= (CURRENT_DATE - '5 years'::interval)
AND vote_data.party IS NOT NULL
AND vote_data.vote IS NOT NULL
)
SELECT
COUNT(DISTINCT ballot_id) AS total_ballots,
COUNT(DISTINCT CASE WHEN party = 'S' THEN ballot_id END) AS s_ballots,
COUNT(DISTINCT CASE WHEN party = 'M' THEN ballot_id END) AS m_ballots
FROM party_votes;
\echo ''
\echo '--- COUNT(DISTINCT) Analysis ---'
\echo 'Purpose: Measure distinct ballot counting overhead'
\echo 'Watch for: Hash aggregate vs. sort aggregate'
\echo ''
-- ===========================================================================
-- SECTION 6: Full Query Analysis
-- ===========================================================================
\echo ''
\echo '=========================================================================='
\echo '=== Full Query: Complete Coalition Alignment Matrix ==='
\echo '=========================================================================='
\echo ''
EXPLAIN (ANALYZE true, VERBOSE true, BUFFERS true, TIMING true, SUMMARY true, FORMAT text)
WITH party_votes AS (
SELECT vote_data.party,
vote_data.embedded_id_ballot_id AS ballot_id,
vote_data.vote,
vote_data.vote_date
FROM public.vote_data
WHERE vote_data.vote_date >= (CURRENT_DATE - '5 years'::interval)
AND vote_data.party IS NOT NULL
AND vote_data.vote IS NOT NULL
), party_pairs AS (
SELECT DISTINCT p1.party AS party1, p2.party AS party2
FROM party_votes p1 CROSS JOIN party_votes p2
WHERE p1.party < p2.party
), alignment_metrics AS (
SELECT pp.party1,
pp.party2,
count(DISTINCT pv1.ballot_id) AS total_votes,
count(DISTINCT CASE WHEN pv1.vote = pv2.vote THEN pv1.ballot_id END) AS aligned_votes,
count(DISTINCT CASE WHEN pv1.vote = 'Ja' AND pv2.vote = 'Ja' THEN pv1.ballot_id END) AS both_yes,
count(DISTINCT CASE WHEN pv1.vote = 'Nej' AND pv2.vote = 'Nej' THEN pv1.ballot_id END) AS both_no,
count(DISTINCT CASE WHEN pv1.vote = 'Avstå' OR pv2.vote = 'Avstå' THEN pv1.ballot_id END) AS abstention_count,
min(pv1.vote_date) AS earliest_vote,
max(pv1.vote_date) AS latest_vote
FROM party_pairs pp
JOIN party_votes pv1 ON pv1.party = pp.party1
JOIN party_votes pv2 ON pv2.party = pp.party2 AND pv2.ballot_id = pv1.ballot_id
GROUP BY pp.party1, pp.party2
)
SELECT party1, party2, total_votes, aligned_votes, both_yes, both_no,
abstention_count, earliest_vote, latest_vote,
round((aligned_votes::numeric / NULLIF(total_votes, 0) * 100), 2) AS alignment_percentage,
round((both_yes::numeric / NULLIF(total_votes, 0) * 100), 2) AS joint_support_rate,
round((both_no::numeric / NULLIF(total_votes, 0) * 100), 2) AS joint_opposition_rate,
CASE
WHEN (aligned_votes::numeric / NULLIF(total_votes, 0)) >= 0.80 THEN 'STRONG_COALITION'
WHEN (aligned_votes::numeric / NULLIF(total_votes, 0)) >= 0.60 THEN 'MODERATE_COALITION'
WHEN (aligned_votes::numeric / NULLIF(total_votes, 0)) >= 0.40 THEN 'WEAK_ALIGNMENT'
ELSE 'OPPOSITION'
END AS coalition_strength,
CASE
WHEN (aligned_votes::numeric / NULLIF(total_votes, 0)) >= 0.80 THEN 'Strong coalition partnership - consistent aligned voting'
WHEN (aligned_votes::numeric / NULLIF(total_votes, 0)) >= 0.60 THEN 'Moderate coalition alignment - generally cooperative'
WHEN (aligned_votes::numeric / NULLIF(total_votes, 0)) <= 0.40 THEN 'Opposition positioning - frequently divergent votes'
ELSE 'Neutral relationship - mixed voting patterns'
END AS coalition_assessment
FROM alignment_metrics
WHERE total_votes >= 10
ORDER BY alignment_percentage DESC, total_votes DESC;
-- ===========================================================================
-- SECTION 7: Index & Statistics Analysis
-- ===========================================================================
\echo ''
\echo '=========================================================================='
\echo '=== Index & Statistics Analysis ==='
\echo '=========================================================================='
\echo ''
\echo '--- Current Indexes on vote_data ---'
SELECT
indexname,
indexdef
FROM pg_indexes
WHERE schemaname = 'public'
AND tablename = 'vote_data'
ORDER BY indexname;
\echo ''
\echo '--- Index Usage Statistics ---'
SELECT
schemaname,
tablename,
indexname,
idx_scan AS index_scans,
idx_tup_read AS tuples_read,
idx_tup_fetch AS tuples_fetched,
pg_size_pretty(pg_relation_size(indexrelid)) AS index_size
FROM pg_stat_user_indexes
WHERE schemaname = 'public'
AND tablename = 'vote_data'
ORDER BY idx_scan DESC;
\echo ''
\echo '--- Table Statistics ---'
SELECT
relname AS table_name,
n_live_tup AS row_count,
n_dead_tup AS dead_rows,
pg_size_pretty(pg_total_relation_size(relid)) AS total_size,
pg_size_pretty(pg_relation_size(relid)) AS table_size,
pg_size_pretty(pg_total_relation_size(relid) - pg_relation_size(relid)) AS index_size,
last_vacuum,
last_autovacuum,
last_analyze,
last_autoanalyze
FROM pg_stat_user_tables
WHERE schemaname = 'public'
AND relname = 'vote_data';
\echo ''
\echo '--- Column Statistics (for optimizer) ---'
SELECT
schemaname,
tablename,
attname AS column_name,
n_distinct,
correlation,
most_common_vals,
most_common_freqs
FROM pg_stats
WHERE schemaname = 'public'
AND tablename = 'vote_data'
AND attname IN ('party', 'vote', 'vote_date', 'embedded_id_ballot_id')
ORDER BY attname;
-- ===========================================================================
-- SECTION 8: Recommendations
-- ===========================================================================
\echo ''
\echo '=========================================================================='
\echo '=== Optimization Recommendations ==='
\echo '=========================================================================='
\echo ''
\echo 'Based on the analysis above, consider:'
\echo ''
\echo '1. Missing Indexes (if sequential scans detected):'
\echo ' CREATE INDEX idx_vote_data_coalition_filter'
\echo ' ON vote_data (vote_date, party, vote)'
\echo ' WHERE vote_date >= (CURRENT_DATE - INTERVAL ''5 years'')'
\echo ' AND party IS NOT NULL AND vote IS NOT NULL;'
\echo ''
\echo ' CREATE INDEX idx_vote_data_party_ballot'
\echo ' ON vote_data (party, embedded_id_ballot_id, vote);'
\echo ''
\echo '2. Materialized View (if query runs frequently):'
\echo ' CREATE MATERIALIZED VIEW mv_coalition_alignment_matrix AS'
\echo ' SELECT * FROM view_riksdagen_coalition_alignment_matrix;'
\echo ''
\echo '3. Statistics Update (if estimates are wrong):'
\echo ' ALTER TABLE vote_data ALTER COLUMN party SET STATISTICS 1000;'
\echo ' ALTER TABLE vote_data ALTER COLUMN embedded_id_ballot_id SET STATISTICS 1000;'
\echo ' ANALYZE vote_data;'
\echo ''
\echo '4. Query Tuning:'
\echo ' - Consider work_mem increase for hash aggregations'
\echo ' - Evaluate if DISTINCT can be replaced with GROUP BY'
\echo ' - Test if pre-filtering party_pairs improves performance'
\echo ''
\echo '=========================================================================='
\echo '=== Analysis Complete ==='
\echo '=========================================================================='
|