Spaces:

CommunityOne
/

open-navigator

Running on CPU Upgrade

App Files Files Community

open-navigator / scripts /datasources /grants_gov /grants_gov_integration.py

jcbowyer

Clean HuggingFace deployment without binary files

61d29fc about 1 month ago

raw

history blame contribute delete

13.6 kB

	"""
	Grants.gov API Integration

	Fetch federal grant opportunities and match them to nonprofits in our database.

	API Documentation: https://www.grants.gov/api
	Endpoints:
	- search2: Search for grant opportunities
	- fetchOpportunity: Get detailed opportunity information

	Key Features:
	- No API key required for search2 and fetchOpportunity
	- Search by keyword, funding category, agency, status
	- Filter by Assistance Listing Number (ALN)

	Use Cases:
	1. Alert nonprofits about relevant grant opportunities
	2. Track oral health funding trends
	3. Match available grants to eligible organizations
	4. Monitor policy changes through grant announcements
	"""

	import requests
	import pandas as pd
	from typing import Dict, List, Optional
	from datetime import datetime
	from pathlib import Path
	from loguru import logger
	import time


	class GrantsGovAPI:
	"""Client for Grants.gov RESTful API"""

	BASE_URL = "https://api.grants.gov/v1/api"
	STAGING_URL = "https://api.staging.grants.gov/v1/api"

	def __init__(self, use_staging: bool = False):
	"""
	Initialize Grants.gov API client

	Args:
	use_staging: Use staging environment for testing
	"""
	self.base_url = self.STAGING_URL if use_staging else self.BASE_URL
	self.session = requests.Session()
	self.session.headers.update({
	'Content-Type': 'application/json',
	'User-Agent': 'CommunityOne/1.0 (Civic Engagement Platform)'
	})

	def search_opportunities(
	self,
	keyword: Optional[str] = None,
	funding_categories: Optional[str] = None,
	agencies: Optional[str] = None,
	opp_statuses: str = "forecasted\|posted",
	eligibilities: Optional[str] = None,
	aln: Optional[str] = None,
	rows: int = 100,
	start_record: int = 0
	) -> Dict:
	"""
	Search for grant opportunities

	Args:
	keyword: Search keyword (e.g., "oral health", "dental")
	funding_categories: Funding category codes (e.g., "HL" for Health)
	agencies: Agency codes (e.g., "HHS", "HHS-NIH")
	opp_statuses: Pipe-separated statuses (forecasted\|posted\|closed\|archived)
	eligibilities: Eligibility codes (pipe-separated)
	aln: Assistance Listing Number (formerly CFDA)
	rows: Number of results to return (default 100)
	start_record: Starting record for pagination

	Returns:
	API response with search results

	Example:
	>>> api = GrantsGovAPI()
	>>> results = api.search_opportunities(
	... keyword="oral health",
	... funding_categories="HL",
	... agencies="HHS",
	... opp_statuses="forecasted\|posted"
	... )
	"""
	url = f"{self.base_url}/search2"

	payload = {
	"rows": rows,
	"startRecordNum": start_record,
	"oppStatuses": opp_statuses
	}

	# Add optional parameters
	if keyword:
	payload["keyword"] = keyword
	if funding_categories:
	payload["fundingCategories"] = funding_categories
	if agencies:
	payload["agencies"] = agencies
	if eligibilities:
	payload["eligibilities"] = eligibilities
	if aln:
	payload["aln"] = aln

	logger.info(f"Searching Grants.gov: {payload}")

	response = self.session.post(url, json=payload)
	response.raise_for_status()

	data = response.json()

	if data.get("errorcode") != 0:
	logger.error(f"API error: {data.get('msg')}")
	return data

	hit_count = data.get("data", {}).get("hitCount", 0)
	logger.info(f"Found {hit_count:,} opportunities")

	return data

	def fetch_opportunity(self, opportunity_id: int) -> Dict:
	"""
	Get detailed information about a specific grant opportunity

	Args:
	opportunity_id: Opportunity ID from search results

	Returns:
	Detailed opportunity information
	"""
	url = f"{self.base_url}/fetchOpportunity"
	payload = {"opportunityId": opportunity_id}

	response = self.session.post(url, json=payload)
	response.raise_for_status()

	data = response.json()

	if data.get("errorcode") != 0:
	logger.error(f"API error: {data.get('msg')}")

	return data

	def search_to_dataframe(
	self,
	keyword: Optional[str] = None,
	funding_categories: Optional[str] = None,
	agencies: Optional[str] = None,
	opp_statuses: str = "forecasted\|posted",
	max_results: int = 1000
	) -> pd.DataFrame:
	"""
	Search for opportunities and return as DataFrame

	Args:
	keyword: Search keyword
	funding_categories: Funding category codes
	agencies: Agency codes
	opp_statuses: Opportunity statuses
	max_results: Maximum number of results to fetch

	Returns:
	DataFrame with opportunity information
	"""
	all_opportunities = []
	start_record = 0
	rows_per_request = 100

	while len(all_opportunities) < max_results:
	results = self.search_opportunities(
	keyword=keyword,
	funding_categories=funding_categories,
	agencies=agencies,
	opp_statuses=opp_statuses,
	rows=rows_per_request,
	start_record=start_record
	)

	if results.get("errorcode") != 0:
	break

	data = results.get("data", {})
	hits = data.get("oppHits", [])

	if not hits:
	break

	all_opportunities.extend(hits)

	# Check if we've fetched all available results
	hit_count = data.get("hitCount", 0)
	if len(all_opportunities) >= hit_count:
	break

	start_record += rows_per_request
	time.sleep(0.5) # Rate limiting

	# Convert to DataFrame
	if all_opportunities:
	df = pd.DataFrame(all_opportunities)
	logger.info(f"Fetched {len(df):,} opportunities")
	return df
	else:
	logger.warning("No opportunities found")
	return pd.DataFrame()


	class GrantMatcher:
	"""Match grant opportunities to nonprofits"""

	def __init__(self, grants_api: GrantsGovAPI):
	self.api = grants_api

	def find_oral_health_grants(
	self,
	opp_statuses: str = "forecasted\|posted"
	) -> pd.DataFrame:
	"""
	Find all oral health related grant opportunities

	Returns:
	DataFrame with oral health grants
	"""
	keywords = [
	"oral health",
	"dental",
	"fluoridation",
	"tooth decay",
	"dental care",
	"dental hygiene",
	"dentistry"
	]

	all_grants = []

	for keyword in keywords:
	logger.info(f"Searching for: {keyword}")

	df = self.api.search_to_dataframe(
	keyword=keyword,
	funding_categories="HL", # Health category
	opp_statuses=opp_statuses,
	max_results=500
	)

	if not df.empty:
	df['search_keyword'] = keyword
	all_grants.append(df)

	time.sleep(1) # Rate limiting

	if all_grants:
	combined = pd.concat(all_grants, ignore_index=True)
	# Remove duplicates by opportunity ID
	combined = combined.drop_duplicates(subset=['id'])
	logger.info(f"Found {len(combined):,} unique oral health grants")
	return combined
	else:
	return pd.DataFrame()

	def match_grants_to_state(
	self,
	state_code: str,
	grants_df: pd.DataFrame,
	nonprofits_df: pd.DataFrame
	) -> pd.DataFrame:
	"""
	Match grants to nonprofits in a specific state

	Args:
	state_code: Two-letter state code (e.g., "MA")
	grants_df: DataFrame with grant opportunities
	nonprofits_df: DataFrame with nonprofit organizations

	Returns:
	DataFrame with grant matches
	"""
	# Filter nonprofits to state
	state_nonprofits = nonprofits_df[nonprofits_df['STATE'] == state_code].copy()

	logger.info(f"Matching {len(grants_df):,} grants to {len(state_nonprofits):,} nonprofits in {state_code}")

	# Create cross-join of all grants with all nonprofits
	# (In practice, you'd filter by eligibility criteria, NTEE codes, etc.)
	matches = []

	for _, grant in grants_df.iterrows():
	match_record = {
	'state': state_code,
	'opportunity_id': grant.get('id'),
	'opportunity_number': grant.get('opportunityNumber'),
	'opportunity_title': grant.get('opportunityTitle'),
	'agency': grant.get('agencyCode'),
	'agency_name': grant.get('agencyName'),
	'posted_date': grant.get('openDate'),
	'close_date': grant.get('closeDate'),
	'status': grant.get('opportunityStatus'),
	'aln': grant.get('cfdaList', [{}])[0].get('cfdaNumber') if grant.get('cfdaList') else None,
	'eligible_nonprofit_count': len(state_nonprofits), # Placeholder - need to check actual eligibility
	'last_updated': datetime.now().isoformat()
	}
	matches.append(match_record)

	return pd.DataFrame(matches)

	def save_opportunities_by_state(
	self,
	grants_df: pd.DataFrame,
	output_dir: Path = Path("data/gold/grants")
	):
	"""
	Save grant opportunities organized by state

	Args:
	grants_df: DataFrame with grant opportunities
	output_dir: Output directory for grant data
	"""
	output_dir.mkdir(parents=True, exist_ok=True)

	# Save all opportunities
	all_path = output_dir / "federal_grant_opportunities.parquet"
	grants_df.to_parquet(all_path, index=False)
	logger.info(f"Saved {len(grants_df):,} opportunities to {all_path}")

	# Also save by agency for easier filtering
	if 'agencyCode' in grants_df.columns:
	for agency in grants_df['agencyCode'].dropna().unique():
	agency_df = grants_df[grants_df['agencyCode'] == agency]
	agency_path = output_dir / f"opportunities_{agency.replace('-', '_')}.parquet"
	agency_df.to_parquet(agency_path, index=False)
	logger.info(f"Saved {len(agency_df):,} {agency} opportunities to {agency_path}")


	def main():
	"""Example usage"""
	import argparse

	parser = argparse.ArgumentParser(description="Fetch federal grant opportunities from Grants.gov")
	parser.add_argument("--keyword", help="Search keyword")
	parser.add_argument("--funding-category", help="Funding category code (e.g., HL for Health)")
	parser.add_argument("--agency", help="Agency code (e.g., HHS)")
	parser.add_argument("--oral-health", action="store_true", help="Search for oral health grants")
	parser.add_argument("--output", type=Path, default=Path("data/gold/grants"), help="Output directory")
	parser.add_argument("--staging", action="store_true", help="Use staging environment")

	args = parser.parse_args()

	# Initialize API client
	api = GrantsGovAPI(use_staging=args.staging)
	matcher = GrantMatcher(api)

	if args.oral_health:
	# Find all oral health grants
	grants_df = matcher.find_oral_health_grants()

	if not grants_df.empty:
	# Save results
	matcher.save_opportunities_by_state(grants_df, args.output)

	# Display summary
	print(f"\n{'='*60}")
	print(f"Found {len(grants_df):,} oral health grant opportunities")
	print(f"{'='*60}\n")

	if 'agencyCode' in grants_df.columns:
	print("By Agency:")
	print(grants_df['agencyCode'].value_counts())

	if 'opportunityStatus' in grants_df.columns:
	print("\nBy Status:")
	print(grants_df['opportunityStatus'].value_counts())

	else:
	# Custom search
	results_df = api.search_to_dataframe(
	keyword=args.keyword,
	funding_categories=args.funding_category,
	agencies=args.agency,
	max_results=1000
	)

	if not results_df.empty:
	output_file = args.output / "grant_opportunities.parquet"
	args.output.mkdir(parents=True, exist_ok=True)
	results_df.to_parquet(output_file, index=False)
	print(f"Saved {len(results_df):,} opportunities to {output_file}")


	if __name__ == "__main__":
	main()