""" Grants.gov API Integration Fetch federal grant opportunities and match them to nonprofits in our database. API Documentation: https://www.grants.gov/api Endpoints: - search2: Search for grant opportunities - fetchOpportunity: Get detailed opportunity information Key Features: - No API key required for search2 and fetchOpportunity - Search by keyword, funding category, agency, status - Filter by Assistance Listing Number (ALN) Use Cases: 1. Alert nonprofits about relevant grant opportunities 2. Track oral health funding trends 3. Match available grants to eligible organizations 4. Monitor policy changes through grant announcements """ import requests import pandas as pd from typing import Dict, List, Optional from datetime import datetime from pathlib import Path from loguru import logger import time class GrantsGovAPI: """Client for Grants.gov RESTful API""" BASE_URL = "https://api.grants.gov/v1/api" STAGING_URL = "https://api.staging.grants.gov/v1/api" def __init__(self, use_staging: bool = False): """ Initialize Grants.gov API client Args: use_staging: Use staging environment for testing """ self.base_url = self.STAGING_URL if use_staging else self.BASE_URL self.session = requests.Session() self.session.headers.update({ 'Content-Type': 'application/json', 'User-Agent': 'CommunityOne/1.0 (Civic Engagement Platform)' }) def search_opportunities( self, keyword: Optional[str] = None, funding_categories: Optional[str] = None, agencies: Optional[str] = None, opp_statuses: str = "forecasted|posted", eligibilities: Optional[str] = None, aln: Optional[str] = None, rows: int = 100, start_record: int = 0 ) -> Dict: """ Search for grant opportunities Args: keyword: Search keyword (e.g., "oral health", "dental") funding_categories: Funding category codes (e.g., "HL" for Health) agencies: Agency codes (e.g., "HHS", "HHS-NIH") opp_statuses: Pipe-separated statuses (forecasted|posted|closed|archived) eligibilities: Eligibility codes (pipe-separated) aln: Assistance Listing Number (formerly CFDA) rows: Number of results to return (default 100) start_record: Starting record for pagination Returns: API response with search results Example: >>> api = GrantsGovAPI() >>> results = api.search_opportunities( ... keyword="oral health", ... funding_categories="HL", ... agencies="HHS", ... opp_statuses="forecasted|posted" ... ) """ url = f"{self.base_url}/search2" payload = { "rows": rows, "startRecordNum": start_record, "oppStatuses": opp_statuses } # Add optional parameters if keyword: payload["keyword"] = keyword if funding_categories: payload["fundingCategories"] = funding_categories if agencies: payload["agencies"] = agencies if eligibilities: payload["eligibilities"] = eligibilities if aln: payload["aln"] = aln logger.info(f"Searching Grants.gov: {payload}") response = self.session.post(url, json=payload) response.raise_for_status() data = response.json() if data.get("errorcode") != 0: logger.error(f"API error: {data.get('msg')}") return data hit_count = data.get("data", {}).get("hitCount", 0) logger.info(f"Found {hit_count:,} opportunities") return data def fetch_opportunity(self, opportunity_id: int) -> Dict: """ Get detailed information about a specific grant opportunity Args: opportunity_id: Opportunity ID from search results Returns: Detailed opportunity information """ url = f"{self.base_url}/fetchOpportunity" payload = {"opportunityId": opportunity_id} response = self.session.post(url, json=payload) response.raise_for_status() data = response.json() if data.get("errorcode") != 0: logger.error(f"API error: {data.get('msg')}") return data def search_to_dataframe( self, keyword: Optional[str] = None, funding_categories: Optional[str] = None, agencies: Optional[str] = None, opp_statuses: str = "forecasted|posted", max_results: int = 1000 ) -> pd.DataFrame: """ Search for opportunities and return as DataFrame Args: keyword: Search keyword funding_categories: Funding category codes agencies: Agency codes opp_statuses: Opportunity statuses max_results: Maximum number of results to fetch Returns: DataFrame with opportunity information """ all_opportunities = [] start_record = 0 rows_per_request = 100 while len(all_opportunities) < max_results: results = self.search_opportunities( keyword=keyword, funding_categories=funding_categories, agencies=agencies, opp_statuses=opp_statuses, rows=rows_per_request, start_record=start_record ) if results.get("errorcode") != 0: break data = results.get("data", {}) hits = data.get("oppHits", []) if not hits: break all_opportunities.extend(hits) # Check if we've fetched all available results hit_count = data.get("hitCount", 0) if len(all_opportunities) >= hit_count: break start_record += rows_per_request time.sleep(0.5) # Rate limiting # Convert to DataFrame if all_opportunities: df = pd.DataFrame(all_opportunities) logger.info(f"Fetched {len(df):,} opportunities") return df else: logger.warning("No opportunities found") return pd.DataFrame() class GrantMatcher: """Match grant opportunities to nonprofits""" def __init__(self, grants_api: GrantsGovAPI): self.api = grants_api def find_oral_health_grants( self, opp_statuses: str = "forecasted|posted" ) -> pd.DataFrame: """ Find all oral health related grant opportunities Returns: DataFrame with oral health grants """ keywords = [ "oral health", "dental", "fluoridation", "tooth decay", "dental care", "dental hygiene", "dentistry" ] all_grants = [] for keyword in keywords: logger.info(f"Searching for: {keyword}") df = self.api.search_to_dataframe( keyword=keyword, funding_categories="HL", # Health category opp_statuses=opp_statuses, max_results=500 ) if not df.empty: df['search_keyword'] = keyword all_grants.append(df) time.sleep(1) # Rate limiting if all_grants: combined = pd.concat(all_grants, ignore_index=True) # Remove duplicates by opportunity ID combined = combined.drop_duplicates(subset=['id']) logger.info(f"Found {len(combined):,} unique oral health grants") return combined else: return pd.DataFrame() def match_grants_to_state( self, state_code: str, grants_df: pd.DataFrame, nonprofits_df: pd.DataFrame ) -> pd.DataFrame: """ Match grants to nonprofits in a specific state Args: state_code: Two-letter state code (e.g., "MA") grants_df: DataFrame with grant opportunities nonprofits_df: DataFrame with nonprofit organizations Returns: DataFrame with grant matches """ # Filter nonprofits to state state_nonprofits = nonprofits_df[nonprofits_df['STATE'] == state_code].copy() logger.info(f"Matching {len(grants_df):,} grants to {len(state_nonprofits):,} nonprofits in {state_code}") # Create cross-join of all grants with all nonprofits # (In practice, you'd filter by eligibility criteria, NTEE codes, etc.) matches = [] for _, grant in grants_df.iterrows(): match_record = { 'state': state_code, 'opportunity_id': grant.get('id'), 'opportunity_number': grant.get('opportunityNumber'), 'opportunity_title': grant.get('opportunityTitle'), 'agency': grant.get('agencyCode'), 'agency_name': grant.get('agencyName'), 'posted_date': grant.get('openDate'), 'close_date': grant.get('closeDate'), 'status': grant.get('opportunityStatus'), 'aln': grant.get('cfdaList', [{}])[0].get('cfdaNumber') if grant.get('cfdaList') else None, 'eligible_nonprofit_count': len(state_nonprofits), # Placeholder - need to check actual eligibility 'last_updated': datetime.now().isoformat() } matches.append(match_record) return pd.DataFrame(matches) def save_opportunities_by_state( self, grants_df: pd.DataFrame, output_dir: Path = Path("data/gold/grants") ): """ Save grant opportunities organized by state Args: grants_df: DataFrame with grant opportunities output_dir: Output directory for grant data """ output_dir.mkdir(parents=True, exist_ok=True) # Save all opportunities all_path = output_dir / "federal_grant_opportunities.parquet" grants_df.to_parquet(all_path, index=False) logger.info(f"Saved {len(grants_df):,} opportunities to {all_path}") # Also save by agency for easier filtering if 'agencyCode' in grants_df.columns: for agency in grants_df['agencyCode'].dropna().unique(): agency_df = grants_df[grants_df['agencyCode'] == agency] agency_path = output_dir / f"opportunities_{agency.replace('-', '_')}.parquet" agency_df.to_parquet(agency_path, index=False) logger.info(f"Saved {len(agency_df):,} {agency} opportunities to {agency_path}") def main(): """Example usage""" import argparse parser = argparse.ArgumentParser(description="Fetch federal grant opportunities from Grants.gov") parser.add_argument("--keyword", help="Search keyword") parser.add_argument("--funding-category", help="Funding category code (e.g., HL for Health)") parser.add_argument("--agency", help="Agency code (e.g., HHS)") parser.add_argument("--oral-health", action="store_true", help="Search for oral health grants") parser.add_argument("--output", type=Path, default=Path("data/gold/grants"), help="Output directory") parser.add_argument("--staging", action="store_true", help="Use staging environment") args = parser.parse_args() # Initialize API client api = GrantsGovAPI(use_staging=args.staging) matcher = GrantMatcher(api) if args.oral_health: # Find all oral health grants grants_df = matcher.find_oral_health_grants() if not grants_df.empty: # Save results matcher.save_opportunities_by_state(grants_df, args.output) # Display summary print(f"\n{'='*60}") print(f"Found {len(grants_df):,} oral health grant opportunities") print(f"{'='*60}\n") if 'agencyCode' in grants_df.columns: print("By Agency:") print(grants_df['agencyCode'].value_counts()) if 'opportunityStatus' in grants_df.columns: print("\nBy Status:") print(grants_df['opportunityStatus'].value_counts()) else: # Custom search results_df = api.search_to_dataframe( keyword=args.keyword, funding_categories=args.funding_category, agencies=args.agency, max_results=1000 ) if not results_df.empty: output_file = args.output / "grant_opportunities.parquet" args.output.mkdir(parents=True, exist_ok=True) results_df.to_parquet(output_file, index=False) print(f"Saved {len(results_df):,} opportunities to {output_file}") if __name__ == "__main__": main()