File size: 6,247 Bytes
61d29fc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
#!/bin/bash
set -e  # Exit on error

# Setup OpenStates PostgreSQL Database
# Loads the 9.8GB legislative data dump from Open States
# Contains: 50+ tables with bills, legislators, votes, committees for all 50 states

# βœ… REQUIRES TWO FILES:
# 1. Schema file: Creates tables, indexes, constraints (~50 MB)
# 2. Data file: Contains all the actual data (~10 GB)
#
# Download both with:
#   python scripts/bulk_legislative_download.py --postgres --month 2026-04
#
# This script restores schema first, then data.

echo "πŸ›οΈ  OpenStates Database Setup"
echo "===================================="
echo ""

# Configuration
DB_NAME="openstates"
DB_USER="postgres"
DB_PASSWORD="password"
DB_HOST="localhost"
DB_PORT="5432"  # PostgreSQL container (charming_mestorf)
SCHEMA_FILE="data/cache/legislation_bulk/postgres/2026-04-schema.pgdump"
DATA_FILE="data/cache/legislation_bulk/postgres/2026-04-public.pgdump"

# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color

# Check if schema file exists
if [ ! -f "$SCHEMA_FILE" ]; then
    echo -e "${RED}❌ Error: Schema file not found at $SCHEMA_FILE${NC}"
    echo ""
    echo "Download both schema and data files with:"
    echo "  python scripts/bulk_legislative_download.py --postgres --month 2026-04"
    exit 1
fi

echo -e "${GREEN}βœ“${NC} Found schema file: $SCHEMA_FILE"
SCHEMA_SIZE=$(du -h "$SCHEMA_FILE" | cut -f1)
echo -e "  Size: $SCHEMA_SIZE"
echo ""

# Check if data file exists
if [ ! -f "$DATA_FILE" ]; then
    echo -e "${RED}❌ Error: Data file not found at $DATA_FILE${NC}"
    echo ""
    echo "Download both schema and data files with:"
    echo "  python scripts/bulk_legislative_download.py --postgres --month 2026-04"
    exit 1
fi

echo -e "${GREEN}βœ“${NC} Found data file: $DATA_FILE"
DATA_SIZE=$(du -h "$DATA_FILE" | cut -f1)
echo -e "  Size: $DATA_SIZE"
echo ""

# Check if PostgreSQL is running
if ! pg_isready -h $DB_HOST -p $DB_PORT -U $DB_USER > /dev/null 2>&1; then
    echo -e "${RED}❌ Error: PostgreSQL 17 is not running on port $DB_PORT${NC}"
    echo ""
    echo "Start PostgreSQL 17 container:"
    echo "  docker start openstates-db"
    exit 1
fi

echo -e "${GREEN}βœ“${NC} PostgreSQL is running"
echo ""

# Check if database already exists
if PGPASSWORD=$DB_PASSWORD psql -h $DB_HOST -p $DB_PORT -U $DB_USER -lqt | cut -d \| -f 1 | grep -qw $DB_NAME; then
    echo -e "${YELLOW}⚠️  Database '$DB_NAME' already exists${NC}"
    echo ""
    read -p "Drop and recreate? (y/N): " -n 1 -r
    echo
    if [[ $REPLY =~ ^[Yy]$ ]]; then
        echo "Dropping existing database..."
        PGPASSWORD=$DB_PASSWORD psql -h $DB_HOST -p $DB_PORT -U $DB_USER -c "DROP DATABASE $DB_NAME;"
        echo -e "${GREEN}βœ“${NC} Database dropped"
    else
        echo "Skipping database creation. Will attempt restore into existing database."
        echo ""
    fi
fi

# Create database if it doesn't exist
if ! PGPASSWORD=$DB_PASSWORD psql -h $DB_HOST -p $DB_PORT -U $DB_USER -lqt | cut -d \| -f 1 | grep -qw $DB_NAME; then
    echo "Creating database '$DB_NAME'..."
    PGPASSWORD=$DB_PASSWORD psql -h $DB_HOST -p $DB_PORT -U $DB_USER -c "CREATE DATABASE $DB_NAME;"
    echo -e "${GREEN}βœ“${NC} Database created"
    echo ""
fi

# Restore the schema and data
echo "πŸ”„ Restoring OpenStates database (2-step process)..."
echo ""

# Use pg_restore with verbose output
# --clean: Drop existing objects before recreating
# --if-exists: Don't error if objects don't exist
# --no-owner: Don't set ownership
# --no-acl: Don't set access privileges
export PGPASSWORD=$DB_PASSWORD

# Step 1: Restore schema (creates tables, indexes, constraints)
echo "πŸ“‹ Step 1/2: Restoring schema (creates all tables)..."
echo "Started at $(date)..."
pg_restore \
    -h $DB_HOST \
    -p $DB_PORT \
    -U $DB_USER \
    -d $DB_NAME \
    --clean \
    --if-exists \
    --no-owner \
    --no-acl \
    --verbose \
    "$SCHEMA_FILE" 2>&1 | while read line; do
        # Only show important messages
        if echo "$line" | grep -qE "^processing|^creating|^ERROR|^WARNING"; then
            echo "  $line"
        fi
    done

echo ""
echo -e "${GREEN}βœ“${NC} Schema restored at $(date)"
echo ""

# Step 2: Restore data (this takes longer, ~10-15 minutes)
echo "πŸ“Š Step 2/2: Restoring data (this will take 10-15 minutes for 9.8GB)..."
echo "Started at $(date)..."
pg_restore \
    -h $DB_HOST \
    -p $DB_PORT \
    -U $DB_USER \
    -d $DB_NAME \
    --data-only \
    --no-owner \
    --no-acl \
    --disable-triggers \
    --verbose \
    "$DATA_FILE" 2>&1 | while read line; do
        # Only show important messages
        if echo "$line" | grep -qE "^processing|^restoring data|^ERROR|^WARNING"; then
            echo "  $line"
        fi
    done

echo ""
echo "Data restore completed at $(date)"
echo ""

# Verify the restore
echo "πŸ“Š Verifying database contents..."
echo ""

TABLE_COUNT=$(PGPASSWORD=$DB_PASSWORD psql -h $DB_HOST -p $DB_PORT -U $DB_USER -d $DB_NAME -t -c "SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = 'public';")

echo -e "${GREEN}βœ“${NC} Database restored successfully"
echo -e "  Tables created: $TABLE_COUNT"
echo ""

# Show sample table counts
echo "Sample table contents:"
PGPASSWORD=$DB_PASSWORD psql -h $DB_HOST -p $DB_PORT -U $DB_USER -d $DB_NAME << 'SQL'
SELECT 
    tablename,
    (xpath('//row/cnt/text()', query_to_xml(format('SELECT COUNT(*) as cnt FROM %I', tablename), false, true, '')))[1]::text::int AS row_count
FROM pg_tables 
WHERE schemaname = 'public' 
    AND tablename IN ('opencivicdata_person', 'opencivicdata_bill', 'opencivicdata_voteevent', 'opencivicdata_organization')
ORDER BY row_count DESC;
SQL

echo ""
echo "======================================"
echo -e "${GREEN}βœ… OpenStates database setup complete!${NC}"
echo "======================================"
echo ""
echo "Database: $DB_NAME"
echo "Host: $DB_HOST:$DB_PORT (PostgreSQL 17)"
echo ""
echo "Connect with:"
echo "  PGPASSWORD=$DB_PASSWORD psql -h $DB_HOST -p $DB_PORT -U $DB_USER -d $DB_NAME"
echo ""
echo "Or from Python:"
echo "  import psycopg2"
echo "  conn = psycopg2.connect(host='$DB_HOST', port=$DB_PORT, database='$DB_NAME', user='$DB_USER', password='$DB_PASSWORD')"
echo ""