open-navigator / scripts /datasources /grants_gov /grants_gov_integration.py
jcbowyer's picture
Clean HuggingFace deployment without binary files
61d29fc
"""
Grants.gov API Integration
Fetch federal grant opportunities and match them to nonprofits in our database.
API Documentation: https://www.grants.gov/api
Endpoints:
- search2: Search for grant opportunities
- fetchOpportunity: Get detailed opportunity information
Key Features:
- No API key required for search2 and fetchOpportunity
- Search by keyword, funding category, agency, status
- Filter by Assistance Listing Number (ALN)
Use Cases:
1. Alert nonprofits about relevant grant opportunities
2. Track oral health funding trends
3. Match available grants to eligible organizations
4. Monitor policy changes through grant announcements
"""
import requests
import pandas as pd
from typing import Dict, List, Optional
from datetime import datetime
from pathlib import Path
from loguru import logger
import time
class GrantsGovAPI:
"""Client for Grants.gov RESTful API"""
BASE_URL = "https://api.grants.gov/v1/api"
STAGING_URL = "https://api.staging.grants.gov/v1/api"
def __init__(self, use_staging: bool = False):
"""
Initialize Grants.gov API client
Args:
use_staging: Use staging environment for testing
"""
self.base_url = self.STAGING_URL if use_staging else self.BASE_URL
self.session = requests.Session()
self.session.headers.update({
'Content-Type': 'application/json',
'User-Agent': 'CommunityOne/1.0 (Civic Engagement Platform)'
})
def search_opportunities(
self,
keyword: Optional[str] = None,
funding_categories: Optional[str] = None,
agencies: Optional[str] = None,
opp_statuses: str = "forecasted|posted",
eligibilities: Optional[str] = None,
aln: Optional[str] = None,
rows: int = 100,
start_record: int = 0
) -> Dict:
"""
Search for grant opportunities
Args:
keyword: Search keyword (e.g., "oral health", "dental")
funding_categories: Funding category codes (e.g., "HL" for Health)
agencies: Agency codes (e.g., "HHS", "HHS-NIH")
opp_statuses: Pipe-separated statuses (forecasted|posted|closed|archived)
eligibilities: Eligibility codes (pipe-separated)
aln: Assistance Listing Number (formerly CFDA)
rows: Number of results to return (default 100)
start_record: Starting record for pagination
Returns:
API response with search results
Example:
>>> api = GrantsGovAPI()
>>> results = api.search_opportunities(
... keyword="oral health",
... funding_categories="HL",
... agencies="HHS",
... opp_statuses="forecasted|posted"
... )
"""
url = f"{self.base_url}/search2"
payload = {
"rows": rows,
"startRecordNum": start_record,
"oppStatuses": opp_statuses
}
# Add optional parameters
if keyword:
payload["keyword"] = keyword
if funding_categories:
payload["fundingCategories"] = funding_categories
if agencies:
payload["agencies"] = agencies
if eligibilities:
payload["eligibilities"] = eligibilities
if aln:
payload["aln"] = aln
logger.info(f"Searching Grants.gov: {payload}")
response = self.session.post(url, json=payload)
response.raise_for_status()
data = response.json()
if data.get("errorcode") != 0:
logger.error(f"API error: {data.get('msg')}")
return data
hit_count = data.get("data", {}).get("hitCount", 0)
logger.info(f"Found {hit_count:,} opportunities")
return data
def fetch_opportunity(self, opportunity_id: int) -> Dict:
"""
Get detailed information about a specific grant opportunity
Args:
opportunity_id: Opportunity ID from search results
Returns:
Detailed opportunity information
"""
url = f"{self.base_url}/fetchOpportunity"
payload = {"opportunityId": opportunity_id}
response = self.session.post(url, json=payload)
response.raise_for_status()
data = response.json()
if data.get("errorcode") != 0:
logger.error(f"API error: {data.get('msg')}")
return data
def search_to_dataframe(
self,
keyword: Optional[str] = None,
funding_categories: Optional[str] = None,
agencies: Optional[str] = None,
opp_statuses: str = "forecasted|posted",
max_results: int = 1000
) -> pd.DataFrame:
"""
Search for opportunities and return as DataFrame
Args:
keyword: Search keyword
funding_categories: Funding category codes
agencies: Agency codes
opp_statuses: Opportunity statuses
max_results: Maximum number of results to fetch
Returns:
DataFrame with opportunity information
"""
all_opportunities = []
start_record = 0
rows_per_request = 100
while len(all_opportunities) < max_results:
results = self.search_opportunities(
keyword=keyword,
funding_categories=funding_categories,
agencies=agencies,
opp_statuses=opp_statuses,
rows=rows_per_request,
start_record=start_record
)
if results.get("errorcode") != 0:
break
data = results.get("data", {})
hits = data.get("oppHits", [])
if not hits:
break
all_opportunities.extend(hits)
# Check if we've fetched all available results
hit_count = data.get("hitCount", 0)
if len(all_opportunities) >= hit_count:
break
start_record += rows_per_request
time.sleep(0.5) # Rate limiting
# Convert to DataFrame
if all_opportunities:
df = pd.DataFrame(all_opportunities)
logger.info(f"Fetched {len(df):,} opportunities")
return df
else:
logger.warning("No opportunities found")
return pd.DataFrame()
class GrantMatcher:
"""Match grant opportunities to nonprofits"""
def __init__(self, grants_api: GrantsGovAPI):
self.api = grants_api
def find_oral_health_grants(
self,
opp_statuses: str = "forecasted|posted"
) -> pd.DataFrame:
"""
Find all oral health related grant opportunities
Returns:
DataFrame with oral health grants
"""
keywords = [
"oral health",
"dental",
"fluoridation",
"tooth decay",
"dental care",
"dental hygiene",
"dentistry"
]
all_grants = []
for keyword in keywords:
logger.info(f"Searching for: {keyword}")
df = self.api.search_to_dataframe(
keyword=keyword,
funding_categories="HL", # Health category
opp_statuses=opp_statuses,
max_results=500
)
if not df.empty:
df['search_keyword'] = keyword
all_grants.append(df)
time.sleep(1) # Rate limiting
if all_grants:
combined = pd.concat(all_grants, ignore_index=True)
# Remove duplicates by opportunity ID
combined = combined.drop_duplicates(subset=['id'])
logger.info(f"Found {len(combined):,} unique oral health grants")
return combined
else:
return pd.DataFrame()
def match_grants_to_state(
self,
state_code: str,
grants_df: pd.DataFrame,
nonprofits_df: pd.DataFrame
) -> pd.DataFrame:
"""
Match grants to nonprofits in a specific state
Args:
state_code: Two-letter state code (e.g., "MA")
grants_df: DataFrame with grant opportunities
nonprofits_df: DataFrame with nonprofit organizations
Returns:
DataFrame with grant matches
"""
# Filter nonprofits to state
state_nonprofits = nonprofits_df[nonprofits_df['STATE'] == state_code].copy()
logger.info(f"Matching {len(grants_df):,} grants to {len(state_nonprofits):,} nonprofits in {state_code}")
# Create cross-join of all grants with all nonprofits
# (In practice, you'd filter by eligibility criteria, NTEE codes, etc.)
matches = []
for _, grant in grants_df.iterrows():
match_record = {
'state': state_code,
'opportunity_id': grant.get('id'),
'opportunity_number': grant.get('opportunityNumber'),
'opportunity_title': grant.get('opportunityTitle'),
'agency': grant.get('agencyCode'),
'agency_name': grant.get('agencyName'),
'posted_date': grant.get('openDate'),
'close_date': grant.get('closeDate'),
'status': grant.get('opportunityStatus'),
'aln': grant.get('cfdaList', [{}])[0].get('cfdaNumber') if grant.get('cfdaList') else None,
'eligible_nonprofit_count': len(state_nonprofits), # Placeholder - need to check actual eligibility
'last_updated': datetime.now().isoformat()
}
matches.append(match_record)
return pd.DataFrame(matches)
def save_opportunities_by_state(
self,
grants_df: pd.DataFrame,
output_dir: Path = Path("data/gold/grants")
):
"""
Save grant opportunities organized by state
Args:
grants_df: DataFrame with grant opportunities
output_dir: Output directory for grant data
"""
output_dir.mkdir(parents=True, exist_ok=True)
# Save all opportunities
all_path = output_dir / "federal_grant_opportunities.parquet"
grants_df.to_parquet(all_path, index=False)
logger.info(f"Saved {len(grants_df):,} opportunities to {all_path}")
# Also save by agency for easier filtering
if 'agencyCode' in grants_df.columns:
for agency in grants_df['agencyCode'].dropna().unique():
agency_df = grants_df[grants_df['agencyCode'] == agency]
agency_path = output_dir / f"opportunities_{agency.replace('-', '_')}.parquet"
agency_df.to_parquet(agency_path, index=False)
logger.info(f"Saved {len(agency_df):,} {agency} opportunities to {agency_path}")
def main():
"""Example usage"""
import argparse
parser = argparse.ArgumentParser(description="Fetch federal grant opportunities from Grants.gov")
parser.add_argument("--keyword", help="Search keyword")
parser.add_argument("--funding-category", help="Funding category code (e.g., HL for Health)")
parser.add_argument("--agency", help="Agency code (e.g., HHS)")
parser.add_argument("--oral-health", action="store_true", help="Search for oral health grants")
parser.add_argument("--output", type=Path, default=Path("data/gold/grants"), help="Output directory")
parser.add_argument("--staging", action="store_true", help="Use staging environment")
args = parser.parse_args()
# Initialize API client
api = GrantsGovAPI(use_staging=args.staging)
matcher = GrantMatcher(api)
if args.oral_health:
# Find all oral health grants
grants_df = matcher.find_oral_health_grants()
if not grants_df.empty:
# Save results
matcher.save_opportunities_by_state(grants_df, args.output)
# Display summary
print(f"\n{'='*60}")
print(f"Found {len(grants_df):,} oral health grant opportunities")
print(f"{'='*60}\n")
if 'agencyCode' in grants_df.columns:
print("By Agency:")
print(grants_df['agencyCode'].value_counts())
if 'opportunityStatus' in grants_df.columns:
print("\nBy Status:")
print(grants_df['opportunityStatus'].value_counts())
else:
# Custom search
results_df = api.search_to_dataframe(
keyword=args.keyword,
funding_categories=args.funding_category,
agencies=args.agency,
max_results=1000
)
if not results_df.empty:
output_file = args.output / "grant_opportunities.parquet"
args.output.mkdir(parents=True, exist_ok=True)
results_df.to_parquet(output_file, index=False)
print(f"Saved {len(results_df):,} opportunities to {output_file}")
if __name__ == "__main__":
main()