File size: 19,424 Bytes
61d29fc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
"""
FEC (Federal Election Commission) Data Integration

Track political contributions and their relationship to:
- Nonprofit leadership (board members, executives)
- Policy decisions and grant awards
- Oral health advocacy funding

Data Sources:
1. FEC Bulk Data: Individual contributions, committee finances
2. FEC API: Real-time contribution tracking
3. OpenFEC: RESTful API for contribution searches

Use Cases:
- Map donor networks in oral health advocacy
- Track political influence on grant awards
- Identify politically active nonprofit leaders
- Analyze campaign finance in healthcare policy

API Documentation: https://api.open.fec.gov/developers/
Bulk Data: https://www.fec.gov/data/browse-data/?tab=bulk-data
"""

import requests
import pandas as pd
from typing import Dict, List, Optional, Tuple
from datetime import datetime
from pathlib import Path
from loguru import logger
import time
import zipfile
import io


class OpenFECAPI:
    """Client for OpenFEC API (easier than parsing bulk files)"""
    
    BASE_URL = "https://api.open.fec.gov/v1"
    
    def __init__(self, api_key: Optional[str] = None):
        """
        Initialize OpenFEC API client
        
        Args:
            api_key: FEC API key (get from https://api.data.gov/signup/)
                    If None, uses 'DEMO_KEY' with lower rate limits
        
        Note:
            Get your free API key at: https://api.data.gov/signup/
            DEMO_KEY has strict rate limits (30 requests/hour)
        """
        self.api_key = api_key or "DEMO_KEY"
        self.session = requests.Session()
        self.session.headers.update({
            'User-Agent': 'CommunityOne/1.0 (Civic Engagement Platform)'
        })
        
    def _make_request(self, endpoint: str, params: Dict = None) -> Dict:
        """Make API request with rate limiting"""
        if params is None:
            params = {}
            
        params['api_key'] = self.api_key
        
        url = f"{self.BASE_URL}/{endpoint}"
        
        response = self.session.get(url, params=params)
        response.raise_for_status()
        
        # Rate limiting
        time.sleep(0.2)  # 5 requests/second max
        
        return response.json()
    
    def search_individual_contributions(
        self,
        contributor_name: Optional[str] = None,
        contributor_city: Optional[str] = None,
        contributor_state: Optional[str] = None,
        contributor_employer: Optional[str] = None,
        min_amount: Optional[float] = None,
        max_amount: Optional[float] = None,
        min_date: Optional[str] = None,
        max_date: Optional[str] = None,
        per_page: int = 100,
        page: int = 1
    ) -> Dict:
        """
        Search individual contributions
        
        Args:
            contributor_name: Contributor name (partial match)
            contributor_city: City
            contributor_state: Two-letter state code
            contributor_employer: Employer name (partial match)
            min_amount: Minimum contribution amount
            max_amount: Maximum contribution amount
            min_date: Start date (YYYY-MM-DD)
            max_date: End date (YYYY-MM-DD)
            per_page: Results per page (max 100)
            page: Page number
            
        Returns:
            API response with contribution records
            
        Example:
            >>> api = OpenFECAPI(api_key="your_key")
            >>> # Find contributions from nonprofit executives
            >>> results = api.search_individual_contributions(
            ...     contributor_employer="Community Health Center",
            ...     contributor_state="MA",
            ...     min_amount=1000
            ... )
        """
        params = {
            'per_page': per_page,
            'page': page
        }
        
        if contributor_name:
            params['contributor_name'] = contributor_name
        if contributor_city:
            params['contributor_city'] = contributor_city
        if contributor_state:
            params['contributor_state'] = contributor_state
        if contributor_employer:
            params['contributor_employer'] = contributor_employer
        if min_amount:
            params['min_amount'] = min_amount
        if max_amount:
            params['max_amount'] = max_amount
        if min_date:
            params['min_date'] = min_date
        if max_date:
            params['max_date'] = max_date
            
        logger.info(f"Searching FEC contributions: {params}")
        
        return self._make_request('schedules/schedule_a/', params)
    
    def get_candidate_info(self, candidate_id: str) -> Dict:
        """Get information about a specific candidate"""
        return self._make_request(f'candidate/{candidate_id}/')
    
    def search_candidates(
        self,
        name: Optional[str] = None,
        office: Optional[str] = None,  # 'H' (House), 'S' (Senate), 'P' (President)
        state: Optional[str] = None,
        district: Optional[str] = None,
        party: Optional[str] = None,  # 'DEM', 'REP', etc.
        cycle: Optional[int] = None,
        per_page: int = 100
    ) -> Dict:
        """
        Search for candidates
        
        Args:
            name: Candidate name (partial match)
            office: Office type (H, S, P)
            state: Two-letter state code
            district: Congressional district (for House)
            party: Party code (DEM, REP, etc.)
            cycle: Election cycle year
            per_page: Results per page
            
        Returns:
            API response with candidate records
        """
        params = {'per_page': per_page}
        
        if name:
            params['name'] = name
        if office:
            params['office'] = office
        if state:
            params['state'] = state
        if district:
            params['district'] = district
        if party:
            params['party'] = party
        if cycle:
            params['cycle'] = cycle
            
        return self._make_request('candidates/', params)
    
    def search_committees(
        self,
        name: Optional[str] = None,
        committee_type: Optional[str] = None,
        designation: Optional[str] = None,
        state: Optional[str] = None,
        per_page: int = 100
    ) -> Dict:
        """
        Search for committees
        
        Args:
            name: Committee name (partial match)
            committee_type: Type (P=Presidential, H=House, S=Senate, etc.)
            designation: Designation code
            state: Two-letter state code
            per_page: Results per page
            
        Returns:
            API response with committee records
        """
        params = {'per_page': per_page}
        
        if name:
            params['name'] = name
        if committee_type:
            params['committee_type'] = committee_type
        if designation:
            params['designation'] = designation
        if state:
            params['state'] = state
            
        return self._make_request('committees/', params)


class FECBulkDataLoader:
    """Load FEC bulk data files (for comprehensive historical analysis)"""
    
    BULK_DATA_URL = "https://www.fec.gov/files/bulk-downloads"
    
    def __init__(self, cache_dir: Path = Path("data/cache/fec")):
        self.cache_dir = cache_dir
        self.cache_dir.mkdir(parents=True, exist_ok=True)
        
    def download_individual_contributions(
        self,
        cycle: str = "2024",
        force: bool = False
    ) -> Path:
        """
        Download bulk individual contributions file
        
        Args:
            cycle: Election cycle (e.g., "2024", "2022")
            force: Force re-download even if cached
            
        Returns:
            Path to downloaded file
            
        Note:
            These files are LARGE (several GB). Consider using the API
            for smaller queries or state-specific data.
        """
        filename = f"indiv{cycle[-2:]}.zip"  # e.g., indiv24.zip
        cache_file = self.cache_dir / filename
        
        if cache_file.exists() and not force:
            logger.info(f"Using cached file: {cache_file}")
            return cache_file
            
        url = f"{self.BULK_DATA_URL}/{cycle}/{filename}"
        
        logger.info(f"Downloading {url} (this may take a while...)")
        logger.warning(f"File size is typically 1-5 GB!")
        
        response = requests.get(url, stream=True)
        response.raise_for_status()
        
        total_size = int(response.headers.get('content-length', 0))
        
        with open(cache_file, 'wb') as f:
            downloaded = 0
            for chunk in response.iter_content(chunk_size=8192):
                f.write(chunk)
                downloaded += len(chunk)
                if total_size > 0 and downloaded % (10 * 1024 * 1024) == 0:  # Every 10MB
                    logger.info(f"Downloaded: {downloaded / (1024*1024):.1f} MB / {total_size / (1024*1024):.1f} MB")
        
        logger.info(f"Download complete: {cache_file}")
        return cache_file
    
    def parse_individual_contributions(
        self,
        zip_path: Path,
        state_filter: Optional[str] = None,
        employer_filter: Optional[str] = None,
        min_amount: Optional[float] = None
    ) -> pd.DataFrame:
        """
        Parse individual contributions from bulk file
        
        Args:
            zip_path: Path to bulk ZIP file
            state_filter: Filter to specific state (e.g., "MA")
            employer_filter: Filter by employer name (partial match)
            min_amount: Minimum contribution amount
            
        Returns:
            DataFrame with contribution records
            
        Note:
            This can be memory-intensive for full files. Consider filters.
        """
        logger.info(f"Parsing {zip_path}")
        
        with zipfile.ZipFile(zip_path, 'r') as z:
            # Find the main data file (usually .txt)
            txt_files = [f for f in z.namelist() if f.endswith('.txt')]
            
            if not txt_files:
                raise ValueError(f"No .txt file found in {zip_path}")
                
            data_file = txt_files[0]
            logger.info(f"Reading {data_file}")
            
            # FEC bulk files are pipe-delimited
            with z.open(data_file) as f:
                # Read in chunks to handle large files
                chunks = []
                
                for chunk in pd.read_csv(
                    f,
                    delimiter='|',
                    dtype=str,  # Read as strings first
                    chunksize=100000,
                    low_memory=False
                ):
                    # Apply filters during read to reduce memory
                    if state_filter:
                        chunk = chunk[chunk['STATE'] == state_filter]
                    
                    if employer_filter and 'EMPLOYER' in chunk.columns:
                        mask = chunk['EMPLOYER'].str.contains(
                            employer_filter,
                            case=False,
                            na=False
                        )
                        chunk = chunk[mask]
                    
                    if min_amount and 'TRANSACTION_AMT' in chunk.columns:
                        chunk['TRANSACTION_AMT'] = pd.to_numeric(
                            chunk['TRANSACTION_AMT'],
                            errors='coerce'
                        )
                        chunk = chunk[chunk['TRANSACTION_AMT'] >= min_amount]
                    
                    if len(chunk) > 0:
                        chunks.append(chunk)
                
                if chunks:
                    df = pd.concat(chunks, ignore_index=True)
                    logger.info(f"Parsed {len(df):,} records")
                    return df
                else:
                    logger.warning("No records matched filters")
                    return pd.DataFrame()


class PoliticalContributionMatcher:
    """Match FEC contributions to nonprofit leadership"""
    
    def __init__(self, fec_api: OpenFECAPI):
        self.api = fec_api
        
    def find_nonprofit_leadership_contributions(
        self,
        officers_df: pd.DataFrame,
        state_code: str,
        min_amount: float = 200.0,
        election_cycle: str = "2024"
    ) -> pd.DataFrame:
        """
        Find political contributions from nonprofit officers
        
        Args:
            officers_df: DataFrame with nonprofit officers (from IRS 990)
            state_code: State to search (e.g., "MA")
            min_amount: Minimum contribution to track
            election_cycle: Election cycle year
            
        Returns:
            DataFrame matching officers to their political contributions
        """
        logger.info(f"Searching for political contributions from {len(officers_df):,} officers")
        
        all_contributions = []
        
        # Group by person name to avoid duplicates
        if 'person_name' in officers_df.columns:
            unique_names = officers_df['person_name'].dropna().unique()
        else:
            logger.warning("No 'person_name' column found")
            return pd.DataFrame()
        
        for name in unique_names[:100]:  # Limit for demo - API rate limits
            logger.info(f"Searching: {name}")
            
            try:
                results = self.api.search_individual_contributions(
                    contributor_name=name,
                    contributor_state=state_code,
                    min_amount=min_amount,
                    min_date=f"{election_cycle}-01-01"
                )
                
                if results.get('results'):
                    for contrib in results['results']:
                        # Enrich with nonprofit context
                        officer_match = officers_df[
                            officers_df['person_name'] == name
                        ].iloc[0]
                        
                        all_contributions.append({
                            'contributor_name': contrib.get('contributor_name'),
                            'contributor_city': contrib.get('contributor_city'),
                            'contributor_state': contrib.get('contributor_state'),
                            'contributor_employer': contrib.get('contributor_employer'),
                            'contribution_amount': contrib.get('contribution_receipt_amount'),
                            'contribution_date': contrib.get('contribution_receipt_date'),
                            'committee_name': contrib.get('committee', {}).get('name'),
                            'candidate_name': contrib.get('candidate_name'),
                            # Nonprofit context
                            'nonprofit_ein': officer_match.get('ein'),
                            'nonprofit_name': officer_match.get('organization_name'),
                            'officer_title': officer_match.get('title'),
                            'officer_compensation': officer_match.get('compensation')
                        })
                        
            except Exception as e:
                logger.warning(f"Error searching {name}: {e}")
                continue
                
            time.sleep(1)  # Rate limiting
        
        if all_contributions:
            df = pd.DataFrame(all_contributions)
            logger.info(f"Found {len(df):,} contributions from nonprofit leadership")
            return df
        else:
            return pd.DataFrame()
    
    def analyze_political_influence(
        self,
        contributions_df: pd.DataFrame,
        grants_df: pd.DataFrame
    ) -> pd.DataFrame:
        """
        Analyze potential political influence on grant awards
        
        Compare:
        - Which nonprofit leaders donated to campaigns
        - Which nonprofits received federal grants
        - Timeline: donation → grant award
        
        Args:
            contributions_df: Political contributions by nonprofit leaders
            grants_df: Federal grants received by nonprofits
            
        Returns:
            DataFrame with influence analysis
        """
        logger.info("Analyzing political influence patterns")
        
        # Merge contributions with grants by EIN
        merged = contributions_df.merge(
            grants_df,
            left_on='nonprofit_ein',
            right_on='ein',
            how='inner'
        )
        
        if merged.empty:
            logger.warning("No matches between contributions and grants")
            return pd.DataFrame()
        
        # Calculate time between donation and grant
        if 'contribution_date' in merged.columns and 'grant_date' in merged.columns:
            merged['contribution_date'] = pd.to_datetime(merged['contribution_date'])
            merged['grant_date'] = pd.to_datetime(merged['grant_date'])
            merged['days_donation_to_grant'] = (
                merged['grant_date'] - merged['contribution_date']
            ).dt.days
        
        # Aggregate by nonprofit
        summary = merged.groupby('nonprofit_ein').agg({
            'contribution_amount': 'sum',
            'grant_amount': 'sum',
            'contributor_name': 'count'
        }).reset_index()
        
        summary.columns = [
            'ein',
            'total_political_donations',
            'total_grants_received',
            'number_of_donors'
        ]
        
        logger.info(f"Analyzed {len(summary):,} nonprofits with both donations and grants")
        
        return summary


def main():
    """Example usage"""
    import argparse
    
    parser = argparse.ArgumentParser(description="Query FEC political contribution data")
    parser.add_argument("--api-key", help="FEC API key (get from https://api.data.gov/signup/)")
    parser.add_argument("--contributor", help="Contributor name to search")
    parser.add_argument("--employer", help="Employer name to search")
    parser.add_argument("--state", help="State code (e.g., MA)")
    parser.add_argument("--min-amount", type=float, default=200, help="Minimum contribution amount")
    parser.add_argument("--output", type=Path, default=Path("data/gold/fec"), help="Output directory")
    
    args = parser.parse_args()
    
    # Initialize API
    api = OpenFECAPI(api_key=args.api_key)
    
    # Search contributions
    results = api.search_individual_contributions(
        contributor_name=args.contributor,
        contributor_employer=args.employer,
        contributor_state=args.state,
        min_amount=args.min_amount
    )
    
    if results.get('results'):
        df = pd.DataFrame(results['results'])
        
        print(f"\nFound {len(df):,} contributions")
        print(f"\nTotal amount: ${df['contribution_receipt_amount'].sum():,.2f}")
        
        # Save results
        args.output.mkdir(parents=True, exist_ok=True)
        output_file = args.output / "political_contributions.parquet"
        df.to_parquet(output_file, index=False)
        print(f"\nSaved to: {output_file}")
    else:
        print("No contributions found")


if __name__ == "__main__":
    main()