File size: 10,185 Bytes
61d29fc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
-- ============================================================================
-- NEON DATABASE SCHEMA FOR OPEN NAVIGATOR
-- Optimized for fast API queries on HuggingFace Spaces deployment
-- ============================================================================

-- Drop existing tables if rerunning (careful in production!)
DROP TABLE IF EXISTS stats_aggregates CASCADE;
DROP TABLE IF EXISTS nonprofits_search CASCADE;
DROP TABLE IF EXISTS jurisdictions_search CASCADE;
DROP TABLE IF EXISTS contacts_search CASCADE;
DROP TABLE IF EXISTS events_search CASCADE;
DROP TABLE IF EXISTS reference_causes CASCADE;
DROP TABLE IF EXISTS reference_ntee_codes CASCADE;
DROP TABLE IF EXISTS last_sync CASCADE;

-- ============================================================================
-- AGGREGATE STATISTICS TABLES
-- Pre-computed stats for fast dashboard loading
-- ============================================================================

CREATE TABLE stats_aggregates (
    id SERIAL PRIMARY KEY,
    level VARCHAR(20) NOT NULL,  -- 'national', 'state', 'county', 'city'
    state VARCHAR(2),             -- Two-letter state code (e.g., 'MA')
    county VARCHAR(100),          -- County name
    city VARCHAR(100),            -- City name
    
    -- Core metrics
    jurisdictions_count INTEGER DEFAULT 0,
    school_districts_count INTEGER DEFAULT 0,
    nonprofits_count INTEGER DEFAULT 0,
    events_count INTEGER DEFAULT 0,
    bills_count INTEGER DEFAULT 0,
    contacts_count INTEGER DEFAULT 0,
    
    -- Financial aggregates (from nonprofit data)
    total_revenue BIGINT DEFAULT 0,
    total_assets BIGINT DEFAULT 0,
    
    -- Metadata
    last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    
    -- Unique constraint on geographic level
    UNIQUE (level, state, county, city)
);

-- Indexes for fast lookups by geography
CREATE INDEX idx_stats_level ON stats_aggregates(level);
CREATE INDEX idx_stats_state ON stats_aggregates(state) WHERE state IS NOT NULL;
CREATE INDEX idx_stats_state_county ON stats_aggregates(state, county) WHERE county IS NOT NULL;
CREATE INDEX idx_stats_state_city ON stats_aggregates(state, city) WHERE city IS NOT NULL;


-- ============================================================================
-- SEARCH-OPTIMIZED TABLES
-- Denormalized for fast full-text search
-- ============================================================================

-- Nonprofits search table (most frequently searched)
CREATE TABLE nonprofits_search (
    ein VARCHAR(20) PRIMARY KEY,
    name TEXT NOT NULL,
    street_address TEXT,
    city VARCHAR(100),
    state VARCHAR(2),
    zip_code VARCHAR(10),
    county VARCHAR(100),
    
    -- Classification
    ntee_code VARCHAR(10),
    ntee_description TEXT,
    subsection_code VARCHAR(10),
    affiliation_code VARCHAR(10),
    classification_code VARCHAR(20),
    
    -- Financial (most recent year)
    revenue BIGINT,
    assets BIGINT,
    income BIGINT,
    
    -- Status
    ruling_date DATE,
    foundation_code VARCHAR(10),
    pf_filing_requirement_code VARCHAR(10),
    accounting_period VARCHAR(10),
    asset_code VARCHAR(10),
    income_code VARCHAR(10),
    filing_requirement_code VARCHAR(10),
    exempt_organization_status_code VARCHAR(10),
    tax_period VARCHAR(10),
    asset_amount BIGINT,
    income_amount BIGINT,
    form_990_revenue_amount BIGINT,
    
    -- Metadata
    source VARCHAR(50) DEFAULT 'irs_bmf',
    last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

-- Full-text search indexes
CREATE INDEX idx_nonprofits_name_search ON nonprofits_search USING GIN (to_tsvector('english', name));
CREATE INDEX idx_nonprofits_state ON nonprofits_search(state);
CREATE INDEX idx_nonprofits_city_state ON nonprofits_search(city, state);
CREATE INDEX idx_nonprofits_county ON nonprofits_search(county);
CREATE INDEX idx_nonprofits_ntee ON nonprofits_search(ntee_code);
CREATE INDEX idx_nonprofits_zip ON nonprofits_search(zip_code);


-- Jurisdictions search table (cities, counties, townships)
CREATE TABLE jurisdictions_search (
    id SERIAL PRIMARY KEY,
    name TEXT NOT NULL,
    type VARCHAR(50) NOT NULL,  -- 'city', 'county', 'township', 'school_district'
    state VARCHAR(2) NOT NULL,
    county VARCHAR(100),
    
    -- Geographic identifiers
    geoid VARCHAR(20),
    fips_code VARCHAR(20),
    
    -- Population/size
    population INTEGER,
    area_sq_miles DECIMAL(12, 2),
    
    -- Metadata
    source VARCHAR(50) DEFAULT 'census',
    last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    
    -- Unique constraint
    UNIQUE (name, type, state, county)
);

CREATE INDEX idx_jurisdictions_name_search ON jurisdictions_search USING GIN (to_tsvector('english', name));
CREATE INDEX idx_jurisdictions_state ON jurisdictions_search(state);
CREATE INDEX idx_jurisdictions_type ON jurisdictions_search(type);
CREATE INDEX idx_jurisdictions_state_type ON jurisdictions_search(state, type);


-- Contacts search table (nonprofit officers, legislators, etc.)
CREATE TABLE contacts_search (
    id SERIAL PRIMARY KEY,
    name TEXT NOT NULL,
    title VARCHAR(200),
    organization_name TEXT,
    organization_ein VARCHAR(20),
    
    -- Contact info
    email VARCHAR(255),
    phone VARCHAR(50),
    
    -- Address
    street_address TEXT,
    city VARCHAR(100),
    state VARCHAR(2),
    zip_code VARCHAR(10),
    
    -- Role/classification
    role_type VARCHAR(50),  -- 'officer', 'legislator', 'board_member', etc.
    compensation BIGINT,
    hours_per_week DECIMAL(5, 1),
    
    -- Metadata
    source VARCHAR(50) DEFAULT 'irs_990',
    tax_year INTEGER,
    last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

CREATE INDEX idx_contacts_name_search ON contacts_search USING GIN (to_tsvector('english', name));
CREATE INDEX idx_contacts_org_name_search ON contacts_search USING GIN (to_tsvector('english', organization_name));
CREATE INDEX idx_contacts_state ON contacts_search(state);
CREATE INDEX idx_contacts_ein ON contacts_search(organization_ein);
CREATE INDEX idx_contacts_role ON contacts_search(role_type);


-- Events search table (meetings, hearings, events)
CREATE TABLE events_search (
    id SERIAL PRIMARY KEY,
    title TEXT NOT NULL,
    description TEXT,
    event_date DATE,
    event_time TIME,
    
    -- Organization
    jurisdiction_name VARCHAR(200),
    jurisdiction_type VARCHAR(50),
    state VARCHAR(2),
    city VARCHAR(100),
    
    -- Meeting details
    location TEXT,
    meeting_type VARCHAR(100),
    status VARCHAR(50),
    
    -- Documents/links
    agenda_url TEXT,
    minutes_url TEXT,
    video_url TEXT,
    
    -- Metadata
    source VARCHAR(50) DEFAULT 'legistar',
    last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

CREATE INDEX idx_events_title_search ON events_search USING GIN (to_tsvector('english', title));
CREATE INDEX idx_events_date ON events_search(event_date DESC);
CREATE INDEX idx_events_state ON events_search(state);
CREATE INDEX idx_events_jurisdiction ON events_search(jurisdiction_name);
CREATE INDEX idx_events_date_state ON events_search(event_date, state);


-- ============================================================================
-- REFERENCE DATA TABLES
-- Lookup tables for causes, NTEE codes, etc.
-- ============================================================================

CREATE TABLE reference_causes (
    id SERIAL PRIMARY KEY,
    cause_slug VARCHAR(100) UNIQUE NOT NULL,
    cause_name TEXT NOT NULL,
    description TEXT,
    parent_category VARCHAR(100),
    
    -- Metadata
    source VARCHAR(50) DEFAULT 'everyorg',
    last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

CREATE INDEX idx_causes_slug ON reference_causes(cause_slug);
CREATE INDEX idx_causes_name_search ON reference_causes USING GIN (to_tsvector('english', cause_name));


CREATE TABLE reference_ntee_codes (
    code VARCHAR(10) PRIMARY KEY,
    description TEXT NOT NULL,
    category VARCHAR(50),
    subcategory VARCHAR(100),
    
    -- Metadata
    source VARCHAR(50) DEFAULT 'irs',
    last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

CREATE INDEX idx_ntee_description_search ON reference_ntee_codes USING GIN (to_tsvector('english', description));


-- ============================================================================
-- SYNC TRACKING TABLE
-- Track when data was last synced from parquet files
-- ============================================================================

CREATE TABLE last_sync (
    table_name VARCHAR(100) PRIMARY KEY,
    last_sync_time TIMESTAMP NOT NULL,
    records_synced INTEGER DEFAULT 0,
    sync_status VARCHAR(50) DEFAULT 'success',
    error_message TEXT
);


-- ============================================================================
-- HELPER VIEWS FOR COMMON QUERIES
-- ============================================================================

-- Active nonprofits with basic info
CREATE VIEW nonprofits_active AS
SELECT 
    ein,
    name,
    city,
    state,
    ntee_code,
    ntee_description,
    revenue,
    assets
FROM nonprofits_search
WHERE exempt_organization_status_code IS NULL 
   OR exempt_organization_status_code NOT IN ('T', 'X')  -- Exclude terminated
ORDER BY revenue DESC NULLS LAST;


-- Recent events (last 30 days + upcoming)
CREATE VIEW events_recent AS
SELECT 
    id,
    title,
    description,
    event_date,
    jurisdiction_name,
    state,
    city,
    status,
    agenda_url
FROM events_search
WHERE event_date >= CURRENT_DATE - INTERVAL '30 days'
ORDER BY event_date DESC;


-- Top nonprofits by state
CREATE VIEW nonprofits_top_by_state AS
SELECT 
    state,
    ein,
    name,
    city,
    revenue,
    ROW_NUMBER() OVER (PARTITION BY state ORDER BY revenue DESC NULLS LAST) as rank
FROM nonprofits_search
WHERE revenue IS NOT NULL
ORDER BY state, rank;


-- ============================================================================
-- GRANT PERMISSIONS (adjust as needed for your user)
-- ============================================================================

-- Grant SELECT to your application user if needed
-- GRANT SELECT ON ALL TABLES IN SCHEMA public TO your_app_user;
-- GRANT SELECT ON ALL SEQUENCES IN SCHEMA public TO your_app_user;