Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
| set -e # Exit on error | |
| # Setup OpenStates PostgreSQL Database | |
| # Loads the 9.8GB legislative data dump from Open States | |
| # Contains: 50+ tables with bills, legislators, votes, committees for all 50 states | |
| # β REQUIRES TWO FILES: | |
| # 1. Schema file: Creates tables, indexes, constraints (~50 MB) | |
| # 2. Data file: Contains all the actual data (~10 GB) | |
| # | |
| # Download both with: | |
| # python scripts/bulk_legislative_download.py --postgres --month 2026-04 | |
| # | |
| # This script restores schema first, then data. | |
| echo "ποΈ OpenStates Database Setup" | |
| echo "====================================" | |
| echo "" | |
| # Configuration | |
| DB_NAME="openstates" | |
| DB_USER="postgres" | |
| DB_PASSWORD="password" | |
| DB_HOST="localhost" | |
| DB_PORT="5432" # PostgreSQL container (charming_mestorf) | |
| SCHEMA_FILE="data/cache/legislation_bulk/postgres/2026-04-schema.pgdump" | |
| DATA_FILE="data/cache/legislation_bulk/postgres/2026-04-public.pgdump" | |
| # Colors for output | |
| RED='\033[0;31m' | |
| GREEN='\033[0;32m' | |
| YELLOW='\033[1;33m' | |
| NC='\033[0m' # No Color | |
| # Check if schema file exists | |
| if [ ! -f "$SCHEMA_FILE" ]; then | |
| echo -e "${RED}β Error: Schema file not found at $SCHEMA_FILE${NC}" | |
| echo "" | |
| echo "Download both schema and data files with:" | |
| echo " python scripts/bulk_legislative_download.py --postgres --month 2026-04" | |
| exit 1 | |
| fi | |
| echo -e "${GREEN}β${NC} Found schema file: $SCHEMA_FILE" | |
| SCHEMA_SIZE=$(du -h "$SCHEMA_FILE" | cut -f1) | |
| echo -e " Size: $SCHEMA_SIZE" | |
| echo "" | |
| # Check if data file exists | |
| if [ ! -f "$DATA_FILE" ]; then | |
| echo -e "${RED}β Error: Data file not found at $DATA_FILE${NC}" | |
| echo "" | |
| echo "Download both schema and data files with:" | |
| echo " python scripts/bulk_legislative_download.py --postgres --month 2026-04" | |
| exit 1 | |
| fi | |
| echo -e "${GREEN}β${NC} Found data file: $DATA_FILE" | |
| DATA_SIZE=$(du -h "$DATA_FILE" | cut -f1) | |
| echo -e " Size: $DATA_SIZE" | |
| echo "" | |
| # Check if PostgreSQL is running | |
| if ! pg_isready -h $DB_HOST -p $DB_PORT -U $DB_USER > /dev/null 2>&1; then | |
| echo -e "${RED}β Error: PostgreSQL 17 is not running on port $DB_PORT${NC}" | |
| echo "" | |
| echo "Start PostgreSQL 17 container:" | |
| echo " docker start openstates-db" | |
| exit 1 | |
| fi | |
| echo -e "${GREEN}β${NC} PostgreSQL is running" | |
| echo "" | |
| # Check if database already exists | |
| if PGPASSWORD=$DB_PASSWORD psql -h $DB_HOST -p $DB_PORT -U $DB_USER -lqt | cut -d \| -f 1 | grep -qw $DB_NAME; then | |
| echo -e "${YELLOW}β οΈ Database '$DB_NAME' already exists${NC}" | |
| echo "" | |
| read -p "Drop and recreate? (y/N): " -n 1 -r | |
| echo | |
| if [[ $REPLY =~ ^[Yy]$ ]]; then | |
| echo "Dropping existing database..." | |
| PGPASSWORD=$DB_PASSWORD psql -h $DB_HOST -p $DB_PORT -U $DB_USER -c "DROP DATABASE $DB_NAME;" | |
| echo -e "${GREEN}β${NC} Database dropped" | |
| else | |
| echo "Skipping database creation. Will attempt restore into existing database." | |
| echo "" | |
| fi | |
| fi | |
| # Create database if it doesn't exist | |
| if ! PGPASSWORD=$DB_PASSWORD psql -h $DB_HOST -p $DB_PORT -U $DB_USER -lqt | cut -d \| -f 1 | grep -qw $DB_NAME; then | |
| echo "Creating database '$DB_NAME'..." | |
| PGPASSWORD=$DB_PASSWORD psql -h $DB_HOST -p $DB_PORT -U $DB_USER -c "CREATE DATABASE $DB_NAME;" | |
| echo -e "${GREEN}β${NC} Database created" | |
| echo "" | |
| fi | |
| # Restore the schema and data | |
| echo "π Restoring OpenStates database (2-step process)..." | |
| echo "" | |
| # Use pg_restore with verbose output | |
| # --clean: Drop existing objects before recreating | |
| # --if-exists: Don't error if objects don't exist | |
| # --no-owner: Don't set ownership | |
| # --no-acl: Don't set access privileges | |
| export PGPASSWORD=$DB_PASSWORD | |
| # Step 1: Restore schema (creates tables, indexes, constraints) | |
| echo "π Step 1/2: Restoring schema (creates all tables)..." | |
| echo "Started at $(date)..." | |
| pg_restore \ | |
| -h $DB_HOST \ | |
| -p $DB_PORT \ | |
| -U $DB_USER \ | |
| -d $DB_NAME \ | |
| --clean \ | |
| --if-exists \ | |
| --no-owner \ | |
| --no-acl \ | |
| --verbose \ | |
| "$SCHEMA_FILE" 2>&1 | while read line; do | |
| # Only show important messages | |
| if echo "$line" | grep -qE "^processing|^creating|^ERROR|^WARNING"; then | |
| echo " $line" | |
| fi | |
| done | |
| echo "" | |
| echo -e "${GREEN}β${NC} Schema restored at $(date)" | |
| echo "" | |
| # Step 2: Restore data (this takes longer, ~10-15 minutes) | |
| echo "π Step 2/2: Restoring data (this will take 10-15 minutes for 9.8GB)..." | |
| echo "Started at $(date)..." | |
| pg_restore \ | |
| -h $DB_HOST \ | |
| -p $DB_PORT \ | |
| -U $DB_USER \ | |
| -d $DB_NAME \ | |
| --data-only \ | |
| --no-owner \ | |
| --no-acl \ | |
| --disable-triggers \ | |
| --verbose \ | |
| "$DATA_FILE" 2>&1 | while read line; do | |
| # Only show important messages | |
| if echo "$line" | grep -qE "^processing|^restoring data|^ERROR|^WARNING"; then | |
| echo " $line" | |
| fi | |
| done | |
| echo "" | |
| echo "Data restore completed at $(date)" | |
| echo "" | |
| # Verify the restore | |
| echo "π Verifying database contents..." | |
| echo "" | |
| TABLE_COUNT=$(PGPASSWORD=$DB_PASSWORD psql -h $DB_HOST -p $DB_PORT -U $DB_USER -d $DB_NAME -t -c "SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = 'public';") | |
| echo -e "${GREEN}β${NC} Database restored successfully" | |
| echo -e " Tables created: $TABLE_COUNT" | |
| echo "" | |
| # Show sample table counts | |
| echo "Sample table contents:" | |
| PGPASSWORD=$DB_PASSWORD psql -h $DB_HOST -p $DB_PORT -U $DB_USER -d $DB_NAME << 'SQL' | |
| SELECT | |
| tablename, | |
| (xpath('//row/cnt/text()', query_to_xml(format('SELECT COUNT(*) as cnt FROM %I', tablename), false, true, '')))[1]::text::int AS row_count | |
| FROM pg_tables | |
| WHERE schemaname = 'public' | |
| AND tablename IN ('opencivicdata_person', 'opencivicdata_bill', 'opencivicdata_voteevent', 'opencivicdata_organization') | |
| ORDER BY row_count DESC; | |
| SQL | |
| echo "" | |
| echo "======================================" | |
| echo -e "${GREEN}β OpenStates database setup complete!${NC}" | |
| echo "======================================" | |
| echo "" | |
| echo "Database: $DB_NAME" | |
| echo "Host: $DB_HOST:$DB_PORT (PostgreSQL 17)" | |
| echo "" | |
| echo "Connect with:" | |
| echo " PGPASSWORD=$DB_PASSWORD psql -h $DB_HOST -p $DB_PORT -U $DB_USER -d $DB_NAME" | |
| echo "" | |
| echo "Or from Python:" | |
| echo " import psycopg2" | |
| echo " conn = psycopg2.connect(host='$DB_HOST', port=$DB_PORT, database='$DB_NAME', user='$DB_USER', password='$DB_PASSWORD')" | |
| echo "" | |