Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
File size: 10,185 Bytes
61d29fc | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 | -- ============================================================================
-- NEON DATABASE SCHEMA FOR OPEN NAVIGATOR
-- Optimized for fast API queries on HuggingFace Spaces deployment
-- ============================================================================
-- Drop existing tables if rerunning (careful in production!)
DROP TABLE IF EXISTS stats_aggregates CASCADE;
DROP TABLE IF EXISTS nonprofits_search CASCADE;
DROP TABLE IF EXISTS jurisdictions_search CASCADE;
DROP TABLE IF EXISTS contacts_search CASCADE;
DROP TABLE IF EXISTS events_search CASCADE;
DROP TABLE IF EXISTS reference_causes CASCADE;
DROP TABLE IF EXISTS reference_ntee_codes CASCADE;
DROP TABLE IF EXISTS last_sync CASCADE;
-- ============================================================================
-- AGGREGATE STATISTICS TABLES
-- Pre-computed stats for fast dashboard loading
-- ============================================================================
CREATE TABLE stats_aggregates (
id SERIAL PRIMARY KEY,
level VARCHAR(20) NOT NULL, -- 'national', 'state', 'county', 'city'
state VARCHAR(2), -- Two-letter state code (e.g., 'MA')
county VARCHAR(100), -- County name
city VARCHAR(100), -- City name
-- Core metrics
jurisdictions_count INTEGER DEFAULT 0,
school_districts_count INTEGER DEFAULT 0,
nonprofits_count INTEGER DEFAULT 0,
events_count INTEGER DEFAULT 0,
bills_count INTEGER DEFAULT 0,
contacts_count INTEGER DEFAULT 0,
-- Financial aggregates (from nonprofit data)
total_revenue BIGINT DEFAULT 0,
total_assets BIGINT DEFAULT 0,
-- Metadata
last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
-- Unique constraint on geographic level
UNIQUE (level, state, county, city)
);
-- Indexes for fast lookups by geography
CREATE INDEX idx_stats_level ON stats_aggregates(level);
CREATE INDEX idx_stats_state ON stats_aggregates(state) WHERE state IS NOT NULL;
CREATE INDEX idx_stats_state_county ON stats_aggregates(state, county) WHERE county IS NOT NULL;
CREATE INDEX idx_stats_state_city ON stats_aggregates(state, city) WHERE city IS NOT NULL;
-- ============================================================================
-- SEARCH-OPTIMIZED TABLES
-- Denormalized for fast full-text search
-- ============================================================================
-- Nonprofits search table (most frequently searched)
CREATE TABLE nonprofits_search (
ein VARCHAR(20) PRIMARY KEY,
name TEXT NOT NULL,
street_address TEXT,
city VARCHAR(100),
state VARCHAR(2),
zip_code VARCHAR(10),
county VARCHAR(100),
-- Classification
ntee_code VARCHAR(10),
ntee_description TEXT,
subsection_code VARCHAR(10),
affiliation_code VARCHAR(10),
classification_code VARCHAR(20),
-- Financial (most recent year)
revenue BIGINT,
assets BIGINT,
income BIGINT,
-- Status
ruling_date DATE,
foundation_code VARCHAR(10),
pf_filing_requirement_code VARCHAR(10),
accounting_period VARCHAR(10),
asset_code VARCHAR(10),
income_code VARCHAR(10),
filing_requirement_code VARCHAR(10),
exempt_organization_status_code VARCHAR(10),
tax_period VARCHAR(10),
asset_amount BIGINT,
income_amount BIGINT,
form_990_revenue_amount BIGINT,
-- Metadata
source VARCHAR(50) DEFAULT 'irs_bmf',
last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Full-text search indexes
CREATE INDEX idx_nonprofits_name_search ON nonprofits_search USING GIN (to_tsvector('english', name));
CREATE INDEX idx_nonprofits_state ON nonprofits_search(state);
CREATE INDEX idx_nonprofits_city_state ON nonprofits_search(city, state);
CREATE INDEX idx_nonprofits_county ON nonprofits_search(county);
CREATE INDEX idx_nonprofits_ntee ON nonprofits_search(ntee_code);
CREATE INDEX idx_nonprofits_zip ON nonprofits_search(zip_code);
-- Jurisdictions search table (cities, counties, townships)
CREATE TABLE jurisdictions_search (
id SERIAL PRIMARY KEY,
name TEXT NOT NULL,
type VARCHAR(50) NOT NULL, -- 'city', 'county', 'township', 'school_district'
state VARCHAR(2) NOT NULL,
county VARCHAR(100),
-- Geographic identifiers
geoid VARCHAR(20),
fips_code VARCHAR(20),
-- Population/size
population INTEGER,
area_sq_miles DECIMAL(12, 2),
-- Metadata
source VARCHAR(50) DEFAULT 'census',
last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
-- Unique constraint
UNIQUE (name, type, state, county)
);
CREATE INDEX idx_jurisdictions_name_search ON jurisdictions_search USING GIN (to_tsvector('english', name));
CREATE INDEX idx_jurisdictions_state ON jurisdictions_search(state);
CREATE INDEX idx_jurisdictions_type ON jurisdictions_search(type);
CREATE INDEX idx_jurisdictions_state_type ON jurisdictions_search(state, type);
-- Contacts search table (nonprofit officers, legislators, etc.)
CREATE TABLE contacts_search (
id SERIAL PRIMARY KEY,
name TEXT NOT NULL,
title VARCHAR(200),
organization_name TEXT,
organization_ein VARCHAR(20),
-- Contact info
email VARCHAR(255),
phone VARCHAR(50),
-- Address
street_address TEXT,
city VARCHAR(100),
state VARCHAR(2),
zip_code VARCHAR(10),
-- Role/classification
role_type VARCHAR(50), -- 'officer', 'legislator', 'board_member', etc.
compensation BIGINT,
hours_per_week DECIMAL(5, 1),
-- Metadata
source VARCHAR(50) DEFAULT 'irs_990',
tax_year INTEGER,
last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX idx_contacts_name_search ON contacts_search USING GIN (to_tsvector('english', name));
CREATE INDEX idx_contacts_org_name_search ON contacts_search USING GIN (to_tsvector('english', organization_name));
CREATE INDEX idx_contacts_state ON contacts_search(state);
CREATE INDEX idx_contacts_ein ON contacts_search(organization_ein);
CREATE INDEX idx_contacts_role ON contacts_search(role_type);
-- Events search table (meetings, hearings, events)
CREATE TABLE events_search (
id SERIAL PRIMARY KEY,
title TEXT NOT NULL,
description TEXT,
event_date DATE,
event_time TIME,
-- Organization
jurisdiction_name VARCHAR(200),
jurisdiction_type VARCHAR(50),
state VARCHAR(2),
city VARCHAR(100),
-- Meeting details
location TEXT,
meeting_type VARCHAR(100),
status VARCHAR(50),
-- Documents/links
agenda_url TEXT,
minutes_url TEXT,
video_url TEXT,
-- Metadata
source VARCHAR(50) DEFAULT 'legistar',
last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX idx_events_title_search ON events_search USING GIN (to_tsvector('english', title));
CREATE INDEX idx_events_date ON events_search(event_date DESC);
CREATE INDEX idx_events_state ON events_search(state);
CREATE INDEX idx_events_jurisdiction ON events_search(jurisdiction_name);
CREATE INDEX idx_events_date_state ON events_search(event_date, state);
-- ============================================================================
-- REFERENCE DATA TABLES
-- Lookup tables for causes, NTEE codes, etc.
-- ============================================================================
CREATE TABLE reference_causes (
id SERIAL PRIMARY KEY,
cause_slug VARCHAR(100) UNIQUE NOT NULL,
cause_name TEXT NOT NULL,
description TEXT,
parent_category VARCHAR(100),
-- Metadata
source VARCHAR(50) DEFAULT 'everyorg',
last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX idx_causes_slug ON reference_causes(cause_slug);
CREATE INDEX idx_causes_name_search ON reference_causes USING GIN (to_tsvector('english', cause_name));
CREATE TABLE reference_ntee_codes (
code VARCHAR(10) PRIMARY KEY,
description TEXT NOT NULL,
category VARCHAR(50),
subcategory VARCHAR(100),
-- Metadata
source VARCHAR(50) DEFAULT 'irs',
last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX idx_ntee_description_search ON reference_ntee_codes USING GIN (to_tsvector('english', description));
-- ============================================================================
-- SYNC TRACKING TABLE
-- Track when data was last synced from parquet files
-- ============================================================================
CREATE TABLE last_sync (
table_name VARCHAR(100) PRIMARY KEY,
last_sync_time TIMESTAMP NOT NULL,
records_synced INTEGER DEFAULT 0,
sync_status VARCHAR(50) DEFAULT 'success',
error_message TEXT
);
-- ============================================================================
-- HELPER VIEWS FOR COMMON QUERIES
-- ============================================================================
-- Active nonprofits with basic info
CREATE VIEW nonprofits_active AS
SELECT
ein,
name,
city,
state,
ntee_code,
ntee_description,
revenue,
assets
FROM nonprofits_search
WHERE exempt_organization_status_code IS NULL
OR exempt_organization_status_code NOT IN ('T', 'X') -- Exclude terminated
ORDER BY revenue DESC NULLS LAST;
-- Recent events (last 30 days + upcoming)
CREATE VIEW events_recent AS
SELECT
id,
title,
description,
event_date,
jurisdiction_name,
state,
city,
status,
agenda_url
FROM events_search
WHERE event_date >= CURRENT_DATE - INTERVAL '30 days'
ORDER BY event_date DESC;
-- Top nonprofits by state
CREATE VIEW nonprofits_top_by_state AS
SELECT
state,
ein,
name,
city,
revenue,
ROW_NUMBER() OVER (PARTITION BY state ORDER BY revenue DESC NULLS LAST) as rank
FROM nonprofits_search
WHERE revenue IS NOT NULL
ORDER BY state, rank;
-- ============================================================================
-- GRANT PERMISSIONS (adjust as needed for your user)
-- ============================================================================
-- Grant SELECT to your application user if needed
-- GRANT SELECT ON ALL TABLES IN SCHEMA public TO your_app_user;
-- GRANT SELECT ON ALL SEQUENCES IN SCHEMA public TO your_app_user;
|