File size: 15,881 Bytes
5a81b95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
-- Enable detailed query analysis
SET track_io_timing = ON;
SET enable_timing = ON;

\echo '=========================================================================='
\echo '=== Coalition Alignment Matrix - Detailed Subquery Analysis          ==='
\echo '=========================================================================='
\echo ''
\echo 'This analysis breaks down the coalition view into individual CTEs'
\echo 'to identify performance bottlenecks and optimization opportunities.'
\echo ''

-- ===========================================================================
-- SECTION 1: Analyze CTE 1 - party_votes (Base Data Filter)
-- ===========================================================================
\echo ''
\echo '=========================================================================='
\echo '=== CTE 1: party_votes (5-year vote filter)                         ==='
\echo '=========================================================================='
\echo ''

EXPLAIN (ANALYZE true, VERBOSE true, BUFFERS true, TIMING true, SUMMARY true, FORMAT text)
SELECT vote_data.party,
    vote_data.embedded_id_ballot_id AS ballot_id,
    vote_data.vote,
    vote_data.vote_date
FROM public.vote_data
WHERE vote_data.vote_date >= (CURRENT_DATE - '5 years'::interval)
  AND vote_data.party IS NOT NULL
  AND vote_data.vote IS NOT NULL;

\echo ''
\echo '--- CTE 1 Analysis Summary ---'
\echo 'Purpose: Filter vote_data for last 5 years with non-null party/vote'
\echo 'Expected: Should use index on vote_date + party + vote'
\echo 'Watch for: Sequential scans, missing indexes'
\echo ''

-- ===========================================================================
-- SECTION 2: Analyze CTE 2 - party_pairs (Cartesian Product)
-- ===========================================================================
\echo ''
\echo '=========================================================================='
\echo '=== CTE 2: party_pairs (All party combinations)                     ==='
\echo '=========================================================================='
\echo ''

EXPLAIN (ANALYZE true, VERBOSE true, BUFFERS true, TIMING true, SUMMARY true, FORMAT text)
WITH party_votes AS (
    SELECT vote_data.party,
        vote_data.embedded_id_ballot_id AS ballot_id,
        vote_data.vote,
        vote_data.vote_date
    FROM public.vote_data
    WHERE vote_data.vote_date >= (CURRENT_DATE - '5 years'::interval)
      AND vote_data.party IS NOT NULL
      AND vote_data.vote IS NOT NULL
)
SELECT DISTINCT p1.party AS party1, p2.party AS party2
FROM party_votes p1
CROSS JOIN party_votes p2
WHERE p1.party < p2.party;

\echo ''
\echo '--- CTE 2 Analysis Summary ---'
\echo 'Purpose: Generate all unique party pairs (lower-triangle matrix)'
\echo 'Expected: DISTINCT on small result set (8-12 parties = 28-66 pairs)'
\echo 'Watch for: Expensive CROSS JOIN materialization'
\echo ''

-- ===========================================================================
-- SECTION 3: Analyze CTE 3 - alignment_metrics (Heavy Aggregation)
-- ===========================================================================
\echo ''
\echo '=========================================================================='
\echo '=== CTE 3: alignment_metrics (Vote alignment aggregation)            ==='
\echo '=========================================================================='
\echo ''

EXPLAIN (ANALYZE true, VERBOSE true, BUFFERS true, TIMING true, SUMMARY true, FORMAT text)
WITH party_votes AS (
    SELECT vote_data.party,
        vote_data.embedded_id_ballot_id AS ballot_id,
        vote_data.vote,
        vote_data.vote_date
    FROM public.vote_data
    WHERE vote_data.vote_date >= (CURRENT_DATE - '5 years'::interval)
      AND vote_data.party IS NOT NULL
      AND vote_data.vote IS NOT NULL
), party_pairs AS (
    SELECT DISTINCT p1.party AS party1, p2.party AS party2
    FROM party_votes p1
    CROSS JOIN party_votes p2
    WHERE p1.party < p2.party
)
SELECT pp.party1,
    pp.party2,
    count(DISTINCT pv1.ballot_id) AS total_votes,
    count(DISTINCT CASE WHEN pv1.vote = pv2.vote THEN pv1.ballot_id END) AS aligned_votes,
    count(DISTINCT CASE WHEN pv1.vote = 'Ja' AND pv2.vote = 'Ja' THEN pv1.ballot_id END) AS both_yes,
    count(DISTINCT CASE WHEN pv1.vote = 'Nej' AND pv2.vote = 'Nej' THEN pv1.ballot_id END) AS both_no,
    count(DISTINCT CASE WHEN pv1.vote = 'Avstå' OR pv2.vote = 'Avstå' THEN pv1.ballot_id END) AS abstention_count,
    min(pv1.vote_date) AS earliest_vote,
    max(pv1.vote_date) AS latest_vote
FROM party_pairs pp
JOIN party_votes pv1 ON pv1.party = pp.party1
JOIN party_votes pv2 ON pv2.party = pp.party2 AND pv2.ballot_id = pv1.ballot_id
GROUP BY pp.party1, pp.party2;

\echo ''
\echo '--- CTE 3 Analysis Summary ---'
\echo 'Purpose: Calculate alignment metrics for each party pair'
\echo 'Expected: Hash joins on party, nested loop on ballot_id'
\echo 'Watch for: Multiple COUNT(DISTINCT), hash aggregation spills'
\echo ''

-- ===========================================================================
-- SECTION 4: Analyze JOIN Strategy Between CTEs
-- ===========================================================================
\echo ''
\echo '=========================================================================='
\echo '=== JOIN Analysis: party_pairs → party_votes (pv1)                   ==='
\echo '=========================================================================='
\echo ''

EXPLAIN (ANALYZE true, VERBOSE true, BUFFERS true, TIMING true, FORMAT text)
WITH party_votes AS (
    SELECT vote_data.party,
        vote_data.embedded_id_ballot_id AS ballot_id,
        vote_data.vote,
        vote_data.vote_date
    FROM public.vote_data
    WHERE vote_data.vote_date >= (CURRENT_DATE - '5 years'::interval)
      AND vote_data.party IS NOT NULL
      AND vote_data.vote IS NOT NULL
), party_pairs AS (
    SELECT DISTINCT p1.party AS party1, p2.party AS party2
    FROM party_votes p1 CROSS JOIN party_votes p2
    WHERE p1.party < p2.party
)
SELECT pp.party1, pp.party2, COUNT(*) AS pv1_matches
FROM party_pairs pp
JOIN party_votes pv1 ON pv1.party = pp.party1
GROUP BY pp.party1, pp.party2
LIMIT 10;

\echo ''
\echo '--- First JOIN Analysis ---'
\echo 'Join type: party_pairs → party_votes (filtering by party1)'
\echo 'Expected: Hash join or nested loop'
\echo ''

\echo ''
\echo '=========================================================================='
\echo '=== JOIN Analysis: pv1 → party_votes (pv2) on ballot_id             ==='
\echo '=========================================================================='
\echo ''

EXPLAIN (ANALYZE true, VERBOSE true, BUFFERS true, TIMING true, FORMAT text)
WITH party_votes AS (
    SELECT vote_data.party,
        vote_data.embedded_id_ballot_id AS ballot_id,
        vote_data.vote,
        vote_data.vote_date
    FROM public.vote_data
    WHERE vote_data.vote_date >= (CURRENT_DATE - '5 years'::interval)
      AND vote_data.party IS NOT NULL
      AND vote_data.vote IS NOT NULL
), party_pairs AS (
    SELECT DISTINCT p1.party AS party1, p2.party AS party2
    FROM party_votes p1 CROSS JOIN party_votes p2
    WHERE p1.party < p2.party
)
SELECT pp.party1, pp.party2, COUNT(*) AS matching_ballots
FROM party_pairs pp
JOIN party_votes pv1 ON pv1.party = pp.party1
JOIN party_votes pv2 ON pv2.party = pp.party2 AND pv2.ballot_id = pv1.ballot_id
GROUP BY pp.party1, pp.party2
LIMIT 10;

\echo ''
\echo '--- Second JOIN Analysis ---'
\echo 'Join type: pv1 → party_votes (pv2) matching ballot_id'
\echo 'Expected: Hash join on ballot_id with party filter'
\echo 'Watch for: Nested loop if index available'
\echo ''

-- ===========================================================================
-- SECTION 5: Analyze COUNT(DISTINCT) Performance
-- ===========================================================================
\echo ''
\echo '=========================================================================='
\echo '=== Aggregation Analysis: COUNT(DISTINCT ballot_id) Performance     ==='
\echo '=========================================================================='
\echo ''

EXPLAIN (ANALYZE true, VERBOSE true, BUFFERS true, TIMING true, FORMAT text)
WITH party_votes AS (
    SELECT vote_data.party,
        vote_data.embedded_id_ballot_id AS ballot_id,
        vote_data.vote,
        vote_data.vote_date
    FROM public.vote_data
    WHERE vote_data.vote_date >= (CURRENT_DATE - '5 years'::interval)
      AND vote_data.party IS NOT NULL
      AND vote_data.vote IS NOT NULL
)
SELECT 
    COUNT(DISTINCT ballot_id) AS total_ballots,
    COUNT(DISTINCT CASE WHEN party = 'S' THEN ballot_id END) AS s_ballots,
    COUNT(DISTINCT CASE WHEN party = 'M' THEN ballot_id END) AS m_ballots
FROM party_votes;

\echo ''
\echo '--- COUNT(DISTINCT) Analysis ---'
\echo 'Purpose: Measure distinct ballot counting overhead'
\echo 'Watch for: Hash aggregate vs. sort aggregate'
\echo ''

-- ===========================================================================
-- SECTION 6: Full Query Analysis
-- ===========================================================================
\echo ''
\echo '=========================================================================='
\echo '=== Full Query: Complete Coalition Alignment Matrix                 ==='
\echo '=========================================================================='
\echo ''

EXPLAIN (ANALYZE true, VERBOSE true, BUFFERS true, TIMING true, SUMMARY true, FORMAT text)
WITH party_votes AS (
    SELECT vote_data.party,
        vote_data.embedded_id_ballot_id AS ballot_id,
        vote_data.vote,
        vote_data.vote_date
    FROM public.vote_data
    WHERE vote_data.vote_date >= (CURRENT_DATE - '5 years'::interval)
      AND vote_data.party IS NOT NULL
      AND vote_data.vote IS NOT NULL
), party_pairs AS (
    SELECT DISTINCT p1.party AS party1, p2.party AS party2
    FROM party_votes p1 CROSS JOIN party_votes p2
    WHERE p1.party < p2.party
), alignment_metrics AS (
    SELECT pp.party1,
        pp.party2,
        count(DISTINCT pv1.ballot_id) AS total_votes,
        count(DISTINCT CASE WHEN pv1.vote = pv2.vote THEN pv1.ballot_id END) AS aligned_votes,
        count(DISTINCT CASE WHEN pv1.vote = 'Ja' AND pv2.vote = 'Ja' THEN pv1.ballot_id END) AS both_yes,
        count(DISTINCT CASE WHEN pv1.vote = 'Nej' AND pv2.vote = 'Nej' THEN pv1.ballot_id END) AS both_no,
        count(DISTINCT CASE WHEN pv1.vote = 'Avstå' OR pv2.vote = 'Avstå' THEN pv1.ballot_id END) AS abstention_count,
        min(pv1.vote_date) AS earliest_vote,
        max(pv1.vote_date) AS latest_vote
    FROM party_pairs pp
    JOIN party_votes pv1 ON pv1.party = pp.party1
    JOIN party_votes pv2 ON pv2.party = pp.party2 AND pv2.ballot_id = pv1.ballot_id
    GROUP BY pp.party1, pp.party2
)
SELECT party1, party2, total_votes, aligned_votes, both_yes, both_no,
    abstention_count, earliest_vote, latest_vote,
    round((aligned_votes::numeric / NULLIF(total_votes, 0) * 100), 2) AS alignment_percentage,
    round((both_yes::numeric / NULLIF(total_votes, 0) * 100), 2) AS joint_support_rate,
    round((both_no::numeric / NULLIF(total_votes, 0) * 100), 2) AS joint_opposition_rate,
    CASE
        WHEN (aligned_votes::numeric / NULLIF(total_votes, 0)) >= 0.80 THEN 'STRONG_COALITION'
        WHEN (aligned_votes::numeric / NULLIF(total_votes, 0)) >= 0.60 THEN 'MODERATE_COALITION'
        WHEN (aligned_votes::numeric / NULLIF(total_votes, 0)) >= 0.40 THEN 'WEAK_ALIGNMENT'
        ELSE 'OPPOSITION'
    END AS coalition_strength,
    CASE
        WHEN (aligned_votes::numeric / NULLIF(total_votes, 0)) >= 0.80 THEN 'Strong coalition partnership - consistent aligned voting'
        WHEN (aligned_votes::numeric / NULLIF(total_votes, 0)) >= 0.60 THEN 'Moderate coalition alignment - generally cooperative'
        WHEN (aligned_votes::numeric / NULLIF(total_votes, 0)) <= 0.40 THEN 'Opposition positioning - frequently divergent votes'
        ELSE 'Neutral relationship - mixed voting patterns'
    END AS coalition_assessment
FROM alignment_metrics
WHERE total_votes >= 10
ORDER BY alignment_percentage DESC, total_votes DESC;

-- ===========================================================================
-- SECTION 7: Index & Statistics Analysis
-- ===========================================================================
\echo ''
\echo '=========================================================================='
\echo '=== Index & Statistics Analysis                                      ==='
\echo '=========================================================================='
\echo ''

\echo '--- Current Indexes on vote_data ---'
SELECT 
    indexname,
    indexdef
FROM pg_indexes
WHERE schemaname = 'public'
AND tablename = 'vote_data'
ORDER BY indexname;

\echo ''
\echo '--- Index Usage Statistics ---'
SELECT
    schemaname,
    tablename,
    indexname,
    idx_scan AS index_scans,
    idx_tup_read AS tuples_read,
    idx_tup_fetch AS tuples_fetched,
    pg_size_pretty(pg_relation_size(indexrelid)) AS index_size
FROM pg_stat_user_indexes
WHERE schemaname = 'public'
AND tablename = 'vote_data'
ORDER BY idx_scan DESC;

\echo ''
\echo '--- Table Statistics ---'
SELECT 
    relname AS table_name,
    n_live_tup AS row_count,
    n_dead_tup AS dead_rows,
    pg_size_pretty(pg_total_relation_size(relid)) AS total_size,
    pg_size_pretty(pg_relation_size(relid)) AS table_size,
    pg_size_pretty(pg_total_relation_size(relid) - pg_relation_size(relid)) AS index_size,
    last_vacuum,
    last_autovacuum,
    last_analyze,
    last_autoanalyze
FROM pg_stat_user_tables
WHERE schemaname = 'public'
AND relname = 'vote_data';

\echo ''
\echo '--- Column Statistics (for optimizer) ---'
SELECT 
    schemaname,
    tablename,
    attname AS column_name,
    n_distinct,
    correlation,
    most_common_vals,
    most_common_freqs
FROM pg_stats
WHERE schemaname = 'public'
AND tablename = 'vote_data'
AND attname IN ('party', 'vote', 'vote_date', 'embedded_id_ballot_id')
ORDER BY attname;

-- ===========================================================================
-- SECTION 8: Recommendations
-- ===========================================================================
\echo ''
\echo '=========================================================================='
\echo '=== Optimization Recommendations                                     ==='
\echo '=========================================================================='
\echo ''
\echo 'Based on the analysis above, consider:'
\echo ''
\echo '1. Missing Indexes (if sequential scans detected):'
\echo '   CREATE INDEX idx_vote_data_coalition_filter'
\echo '   ON vote_data (vote_date, party, vote)'
\echo '   WHERE vote_date >= (CURRENT_DATE - INTERVAL ''5 years'')'
\echo '     AND party IS NOT NULL AND vote IS NOT NULL;'
\echo ''
\echo '   CREATE INDEX idx_vote_data_party_ballot'
\echo '   ON vote_data (party, embedded_id_ballot_id, vote);'
\echo ''
\echo '2. Materialized View (if query runs frequently):'
\echo '   CREATE MATERIALIZED VIEW mv_coalition_alignment_matrix AS'
\echo '   SELECT * FROM view_riksdagen_coalition_alignment_matrix;'
\echo ''
\echo '3. Statistics Update (if estimates are wrong):'
\echo '   ALTER TABLE vote_data ALTER COLUMN party SET STATISTICS 1000;'
\echo '   ALTER TABLE vote_data ALTER COLUMN embedded_id_ballot_id SET STATISTICS 1000;'
\echo '   ANALYZE vote_data;'
\echo ''
\echo '4. Query Tuning:'
\echo '   - Consider work_mem increase for hash aggregations'
\echo '   - Evaluate if DISTINCT can be replaced with GROUP BY'
\echo '   - Test if pre-filtering party_pairs improves performance'
\echo ''
\echo '=========================================================================='
\echo '=== Analysis Complete                                                ==='
\echo '=========================================================================='