"""
Grants.gov API Integration

Fetch federal grant opportunities and match them to nonprofits in our database.

API Documentation: https://www.grants.gov/api
Endpoints:
- search2: Search for grant opportunities
- fetchOpportunity: Get detailed opportunity information

Key Features:
- No API key required for search2 and fetchOpportunity
- Search by keyword, funding category, agency, status
- Filter by Assistance Listing Number (ALN)

Use Cases:
1. Alert nonprofits about relevant grant opportunities
2. Track oral health funding trends
3. Match available grants to eligible organizations
4. Monitor policy changes through grant announcements
"""

import requests
import pandas as pd
from typing import Dict, List, Optional
from datetime import datetime
from pathlib import Path
from loguru import logger
import time


class GrantsGovAPI:
    """Client for Grants.gov RESTful API"""
    
    BASE_URL = "https://api.grants.gov/v1/api"
    STAGING_URL = "https://api.staging.grants.gov/v1/api"
    
    def __init__(self, use_staging: bool = False):
        """
        Initialize Grants.gov API client
        
        Args:
            use_staging: Use staging environment for testing
        """
        self.base_url = self.STAGING_URL if use_staging else self.BASE_URL
        self.session = requests.Session()
        self.session.headers.update({
            'Content-Type': 'application/json',
            'User-Agent': 'CommunityOne/1.0 (Civic Engagement Platform)'
        })
        
    def search_opportunities(
        self,
        keyword: Optional[str] = None,
        funding_categories: Optional[str] = None,
        agencies: Optional[str] = None,
        opp_statuses: str = "forecasted|posted",
        eligibilities: Optional[str] = None,
        aln: Optional[str] = None,
        rows: int = 100,
        start_record: int = 0
    ) -> Dict:
        """
        Search for grant opportunities
        
        Args:
            keyword: Search keyword (e.g., "oral health", "dental")
            funding_categories: Funding category codes (e.g., "HL" for Health)
            agencies: Agency codes (e.g., "HHS", "HHS-NIH")
            opp_statuses: Pipe-separated statuses (forecasted|posted|closed|archived)
            eligibilities: Eligibility codes (pipe-separated)
            aln: Assistance Listing Number (formerly CFDA)
            rows: Number of results to return (default 100)
            start_record: Starting record for pagination
            
        Returns:
            API response with search results
            
        Example:
            >>> api = GrantsGovAPI()
            >>> results = api.search_opportunities(
            ...     keyword="oral health",
            ...     funding_categories="HL",
            ...     agencies="HHS",
            ...     opp_statuses="forecasted|posted"
            ... )
        """
        url = f"{self.base_url}/search2"
        
        payload = {
            "rows": rows,
            "startRecordNum": start_record,
            "oppStatuses": opp_statuses
        }
        
        # Add optional parameters
        if keyword:
            payload["keyword"] = keyword
        if funding_categories:
            payload["fundingCategories"] = funding_categories
        if agencies:
            payload["agencies"] = agencies
        if eligibilities:
            payload["eligibilities"] = eligibilities
        if aln:
            payload["aln"] = aln
            
        logger.info(f"Searching Grants.gov: {payload}")
        
        response = self.session.post(url, json=payload)
        response.raise_for_status()
        
        data = response.json()
        
        if data.get("errorcode") != 0:
            logger.error(f"API error: {data.get('msg')}")
            return data
            
        hit_count = data.get("data", {}).get("hitCount", 0)
        logger.info(f"Found {hit_count:,} opportunities")
        
        return data
    
    def fetch_opportunity(self, opportunity_id: int) -> Dict:
        """
        Get detailed information about a specific grant opportunity
        
        Args:
            opportunity_id: Opportunity ID from search results
            
        Returns:
            Detailed opportunity information
        """
        url = f"{self.base_url}/fetchOpportunity"
        payload = {"opportunityId": opportunity_id}
        
        response = self.session.post(url, json=payload)
        response.raise_for_status()
        
        data = response.json()
        
        if data.get("errorcode") != 0:
            logger.error(f"API error: {data.get('msg')}")
            
        return data
    
    def search_to_dataframe(
        self,
        keyword: Optional[str] = None,
        funding_categories: Optional[str] = None,
        agencies: Optional[str] = None,
        opp_statuses: str = "forecasted|posted",
        max_results: int = 1000
    ) -> pd.DataFrame:
        """
        Search for opportunities and return as DataFrame
        
        Args:
            keyword: Search keyword
            funding_categories: Funding category codes
            agencies: Agency codes
            opp_statuses: Opportunity statuses
            max_results: Maximum number of results to fetch
            
        Returns:
            DataFrame with opportunity information
        """
        all_opportunities = []
        start_record = 0
        rows_per_request = 100
        
        while len(all_opportunities) < max_results:
            results = self.search_opportunities(
                keyword=keyword,
                funding_categories=funding_categories,
                agencies=agencies,
                opp_statuses=opp_statuses,
                rows=rows_per_request,
                start_record=start_record
            )
            
            if results.get("errorcode") != 0:
                break
                
            data = results.get("data", {})
            hits = data.get("oppHits", [])
            
            if not hits:
                break
                
            all_opportunities.extend(hits)
            
            # Check if we've fetched all available results
            hit_count = data.get("hitCount", 0)
            if len(all_opportunities) >= hit_count:
                break
                
            start_record += rows_per_request
            time.sleep(0.5)  # Rate limiting
            
        # Convert to DataFrame
        if all_opportunities:
            df = pd.DataFrame(all_opportunities)
            logger.info(f"Fetched {len(df):,} opportunities")
            return df
        else:
            logger.warning("No opportunities found")
            return pd.DataFrame()


class GrantMatcher:
    """Match grant opportunities to nonprofits"""
    
    def __init__(self, grants_api: GrantsGovAPI):
        self.api = grants_api
        
    def find_oral_health_grants(
        self,
        opp_statuses: str = "forecasted|posted"
    ) -> pd.DataFrame:
        """
        Find all oral health related grant opportunities
        
        Returns:
            DataFrame with oral health grants
        """
        keywords = [
            "oral health",
            "dental",
            "fluoridation",
            "tooth decay",
            "dental care",
            "dental hygiene",
            "dentistry"
        ]
        
        all_grants = []
        
        for keyword in keywords:
            logger.info(f"Searching for: {keyword}")
            
            df = self.api.search_to_dataframe(
                keyword=keyword,
                funding_categories="HL",  # Health category
                opp_statuses=opp_statuses,
                max_results=500
            )
            
            if not df.empty:
                df['search_keyword'] = keyword
                all_grants.append(df)
            
            time.sleep(1)  # Rate limiting
            
        if all_grants:
            combined = pd.concat(all_grants, ignore_index=True)
            # Remove duplicates by opportunity ID
            combined = combined.drop_duplicates(subset=['id'])
            logger.info(f"Found {len(combined):,} unique oral health grants")
            return combined
        else:
            return pd.DataFrame()
    
    def match_grants_to_state(
        self,
        state_code: str,
        grants_df: pd.DataFrame,
        nonprofits_df: pd.DataFrame
    ) -> pd.DataFrame:
        """
        Match grants to nonprofits in a specific state
        
        Args:
            state_code: Two-letter state code (e.g., "MA")
            grants_df: DataFrame with grant opportunities
            nonprofits_df: DataFrame with nonprofit organizations
            
        Returns:
            DataFrame with grant matches
        """
        # Filter nonprofits to state
        state_nonprofits = nonprofits_df[nonprofits_df['STATE'] == state_code].copy()
        
        logger.info(f"Matching {len(grants_df):,} grants to {len(state_nonprofits):,} nonprofits in {state_code}")
        
        # Create cross-join of all grants with all nonprofits
        # (In practice, you'd filter by eligibility criteria, NTEE codes, etc.)
        matches = []
        
        for _, grant in grants_df.iterrows():
            match_record = {
                'state': state_code,
                'opportunity_id': grant.get('id'),
                'opportunity_number': grant.get('opportunityNumber'),
                'opportunity_title': grant.get('opportunityTitle'),
                'agency': grant.get('agencyCode'),
                'agency_name': grant.get('agencyName'),
                'posted_date': grant.get('openDate'),
                'close_date': grant.get('closeDate'),
                'status': grant.get('opportunityStatus'),
                'aln': grant.get('cfdaList', [{}])[0].get('cfdaNumber') if grant.get('cfdaList') else None,
                'eligible_nonprofit_count': len(state_nonprofits),  # Placeholder - need to check actual eligibility
                'last_updated': datetime.now().isoformat()
            }
            matches.append(match_record)
            
        return pd.DataFrame(matches)
    
    def save_opportunities_by_state(
        self,
        grants_df: pd.DataFrame,
        output_dir: Path = Path("data/gold/grants")
    ):
        """
        Save grant opportunities organized by state
        
        Args:
            grants_df: DataFrame with grant opportunities
            output_dir: Output directory for grant data
        """
        output_dir.mkdir(parents=True, exist_ok=True)
        
        # Save all opportunities
        all_path = output_dir / "federal_grant_opportunities.parquet"
        grants_df.to_parquet(all_path, index=False)
        logger.info(f"Saved {len(grants_df):,} opportunities to {all_path}")
        
        # Also save by agency for easier filtering
        if 'agencyCode' in grants_df.columns:
            for agency in grants_df['agencyCode'].dropna().unique():
                agency_df = grants_df[grants_df['agencyCode'] == agency]
                agency_path = output_dir / f"opportunities_{agency.replace('-', '_')}.parquet"
                agency_df.to_parquet(agency_path, index=False)
                logger.info(f"Saved {len(agency_df):,} {agency} opportunities to {agency_path}")


def main():
    """Example usage"""
    import argparse
    
    parser = argparse.ArgumentParser(description="Fetch federal grant opportunities from Grants.gov")
    parser.add_argument("--keyword", help="Search keyword")
    parser.add_argument("--funding-category", help="Funding category code (e.g., HL for Health)")
    parser.add_argument("--agency", help="Agency code (e.g., HHS)")
    parser.add_argument("--oral-health", action="store_true", help="Search for oral health grants")
    parser.add_argument("--output", type=Path, default=Path("data/gold/grants"), help="Output directory")
    parser.add_argument("--staging", action="store_true", help="Use staging environment")
    
    args = parser.parse_args()
    
    # Initialize API client
    api = GrantsGovAPI(use_staging=args.staging)
    matcher = GrantMatcher(api)
    
    if args.oral_health:
        # Find all oral health grants
        grants_df = matcher.find_oral_health_grants()
        
        if not grants_df.empty:
            # Save results
            matcher.save_opportunities_by_state(grants_df, args.output)
            
            # Display summary
            print(f"\n{'='*60}")
            print(f"Found {len(grants_df):,} oral health grant opportunities")
            print(f"{'='*60}\n")
            
            if 'agencyCode' in grants_df.columns:
                print("By Agency:")
                print(grants_df['agencyCode'].value_counts())
            
            if 'opportunityStatus' in grants_df.columns:
                print("\nBy Status:")
                print(grants_df['opportunityStatus'].value_counts())
                
    else:
        # Custom search
        results_df = api.search_to_dataframe(
            keyword=args.keyword,
            funding_categories=args.funding_category,
            agencies=args.agency,
            max_results=1000
        )
        
        if not results_df.empty:
            output_file = args.output / "grant_opportunities.parquet"
            args.output.mkdir(parents=True, exist_ok=True)
            results_df.to_parquet(output_file, index=False)
            print(f"Saved {len(results_df):,} opportunities to {output_file}")


if __name__ == "__main__":
    main()