Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
File size: 6,247 Bytes
61d29fc | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 | #!/bin/bash
set -e # Exit on error
# Setup OpenStates PostgreSQL Database
# Loads the 9.8GB legislative data dump from Open States
# Contains: 50+ tables with bills, legislators, votes, committees for all 50 states
# β
REQUIRES TWO FILES:
# 1. Schema file: Creates tables, indexes, constraints (~50 MB)
# 2. Data file: Contains all the actual data (~10 GB)
#
# Download both with:
# python scripts/bulk_legislative_download.py --postgres --month 2026-04
#
# This script restores schema first, then data.
echo "ποΈ OpenStates Database Setup"
echo "===================================="
echo ""
# Configuration
DB_NAME="openstates"
DB_USER="postgres"
DB_PASSWORD="password"
DB_HOST="localhost"
DB_PORT="5432" # PostgreSQL container (charming_mestorf)
SCHEMA_FILE="data/cache/legislation_bulk/postgres/2026-04-schema.pgdump"
DATA_FILE="data/cache/legislation_bulk/postgres/2026-04-public.pgdump"
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
# Check if schema file exists
if [ ! -f "$SCHEMA_FILE" ]; then
echo -e "${RED}β Error: Schema file not found at $SCHEMA_FILE${NC}"
echo ""
echo "Download both schema and data files with:"
echo " python scripts/bulk_legislative_download.py --postgres --month 2026-04"
exit 1
fi
echo -e "${GREEN}β${NC} Found schema file: $SCHEMA_FILE"
SCHEMA_SIZE=$(du -h "$SCHEMA_FILE" | cut -f1)
echo -e " Size: $SCHEMA_SIZE"
echo ""
# Check if data file exists
if [ ! -f "$DATA_FILE" ]; then
echo -e "${RED}β Error: Data file not found at $DATA_FILE${NC}"
echo ""
echo "Download both schema and data files with:"
echo " python scripts/bulk_legislative_download.py --postgres --month 2026-04"
exit 1
fi
echo -e "${GREEN}β${NC} Found data file: $DATA_FILE"
DATA_SIZE=$(du -h "$DATA_FILE" | cut -f1)
echo -e " Size: $DATA_SIZE"
echo ""
# Check if PostgreSQL is running
if ! pg_isready -h $DB_HOST -p $DB_PORT -U $DB_USER > /dev/null 2>&1; then
echo -e "${RED}β Error: PostgreSQL 17 is not running on port $DB_PORT${NC}"
echo ""
echo "Start PostgreSQL 17 container:"
echo " docker start openstates-db"
exit 1
fi
echo -e "${GREEN}β${NC} PostgreSQL is running"
echo ""
# Check if database already exists
if PGPASSWORD=$DB_PASSWORD psql -h $DB_HOST -p $DB_PORT -U $DB_USER -lqt | cut -d \| -f 1 | grep -qw $DB_NAME; then
echo -e "${YELLOW}β οΈ Database '$DB_NAME' already exists${NC}"
echo ""
read -p "Drop and recreate? (y/N): " -n 1 -r
echo
if [[ $REPLY =~ ^[Yy]$ ]]; then
echo "Dropping existing database..."
PGPASSWORD=$DB_PASSWORD psql -h $DB_HOST -p $DB_PORT -U $DB_USER -c "DROP DATABASE $DB_NAME;"
echo -e "${GREEN}β${NC} Database dropped"
else
echo "Skipping database creation. Will attempt restore into existing database."
echo ""
fi
fi
# Create database if it doesn't exist
if ! PGPASSWORD=$DB_PASSWORD psql -h $DB_HOST -p $DB_PORT -U $DB_USER -lqt | cut -d \| -f 1 | grep -qw $DB_NAME; then
echo "Creating database '$DB_NAME'..."
PGPASSWORD=$DB_PASSWORD psql -h $DB_HOST -p $DB_PORT -U $DB_USER -c "CREATE DATABASE $DB_NAME;"
echo -e "${GREEN}β${NC} Database created"
echo ""
fi
# Restore the schema and data
echo "π Restoring OpenStates database (2-step process)..."
echo ""
# Use pg_restore with verbose output
# --clean: Drop existing objects before recreating
# --if-exists: Don't error if objects don't exist
# --no-owner: Don't set ownership
# --no-acl: Don't set access privileges
export PGPASSWORD=$DB_PASSWORD
# Step 1: Restore schema (creates tables, indexes, constraints)
echo "π Step 1/2: Restoring schema (creates all tables)..."
echo "Started at $(date)..."
pg_restore \
-h $DB_HOST \
-p $DB_PORT \
-U $DB_USER \
-d $DB_NAME \
--clean \
--if-exists \
--no-owner \
--no-acl \
--verbose \
"$SCHEMA_FILE" 2>&1 | while read line; do
# Only show important messages
if echo "$line" | grep -qE "^processing|^creating|^ERROR|^WARNING"; then
echo " $line"
fi
done
echo ""
echo -e "${GREEN}β${NC} Schema restored at $(date)"
echo ""
# Step 2: Restore data (this takes longer, ~10-15 minutes)
echo "π Step 2/2: Restoring data (this will take 10-15 minutes for 9.8GB)..."
echo "Started at $(date)..."
pg_restore \
-h $DB_HOST \
-p $DB_PORT \
-U $DB_USER \
-d $DB_NAME \
--data-only \
--no-owner \
--no-acl \
--disable-triggers \
--verbose \
"$DATA_FILE" 2>&1 | while read line; do
# Only show important messages
if echo "$line" | grep -qE "^processing|^restoring data|^ERROR|^WARNING"; then
echo " $line"
fi
done
echo ""
echo "Data restore completed at $(date)"
echo ""
# Verify the restore
echo "π Verifying database contents..."
echo ""
TABLE_COUNT=$(PGPASSWORD=$DB_PASSWORD psql -h $DB_HOST -p $DB_PORT -U $DB_USER -d $DB_NAME -t -c "SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = 'public';")
echo -e "${GREEN}β${NC} Database restored successfully"
echo -e " Tables created: $TABLE_COUNT"
echo ""
# Show sample table counts
echo "Sample table contents:"
PGPASSWORD=$DB_PASSWORD psql -h $DB_HOST -p $DB_PORT -U $DB_USER -d $DB_NAME << 'SQL'
SELECT
tablename,
(xpath('//row/cnt/text()', query_to_xml(format('SELECT COUNT(*) as cnt FROM %I', tablename), false, true, '')))[1]::text::int AS row_count
FROM pg_tables
WHERE schemaname = 'public'
AND tablename IN ('opencivicdata_person', 'opencivicdata_bill', 'opencivicdata_voteevent', 'opencivicdata_organization')
ORDER BY row_count DESC;
SQL
echo ""
echo "======================================"
echo -e "${GREEN}β
OpenStates database setup complete!${NC}"
echo "======================================"
echo ""
echo "Database: $DB_NAME"
echo "Host: $DB_HOST:$DB_PORT (PostgreSQL 17)"
echo ""
echo "Connect with:"
echo " PGPASSWORD=$DB_PASSWORD psql -h $DB_HOST -p $DB_PORT -U $DB_USER -d $DB_NAME"
echo ""
echo "Or from Python:"
echo " import psycopg2"
echo " conn = psycopg2.connect(host='$DB_HOST', port=$DB_PORT, database='$DB_NAME', user='$DB_USER', password='$DB_PASSWORD')"
echo ""
|