Spaces:

CandidAI
/

ask-candid

Running

File size: 15,797 Bytes

from typing import TypedDict, Literal, Annotated, Any
import logging

from langchain_core.tools import tool
import httpx

from ask_candid.tools.utils import format_candid_profile_link
from ask_candid.base.utils import retry_on_status
from ask_candid.base.config.rest import FUNDER_RECOMMENDATION, SEARCH

logging.basicConfig(format="[%(levelname)s] (%(asctime)s) :: %(message)s")
logger = logging.getLogger(__name__)
logger.setLevel(logging.ERROR)


class OrganizationRecord(TypedDict):
    nonprofit_id: Annotated[str, "Unique Candid ID value for the organization"]
    name: Annotated[str, "Name of the organization"]
    aka_name: Annotated[str, "'Also-known-as' name of the organization"]
    acronym: Annotated[str, "Acronym of the name of the organization"]
    city: Annotated[str, "City that the organization is located in"]
    admin1: Annotated[str, "State, province, or canton that the organization is located in"]
    country: Annotated[str, "Country that the organization is located in"]
    ein: Annotated[str, "IRS employer identification number (EIN) of the organization, only relevant for US-based orgs"]
    profile_link: Annotated[str, "Link to the Candid profile for the organization"]
    working_on: Annotated[str, "Description of the subject purpose of the organization"]
    serving: Annotated[str, "Description of the population groups served by the organization"]
    transparency_level: Annotated[str, "Candid Seal level of the organization indicating transparency level"]
    organization_roles: Annotated[str, "Roles of the organization (eg. grantmaker, recipient)"]
    grants_awarded: Annotated[str, "Summary stats of the grants awarded by the organization"]
    grants_received: Annotated[str, "Summary stats of the grants received by the organization"]


@retry_on_status(num_retries=3)
def get_with_retries(url: str, payload: dict[str, Any] | None, headers: dict[str, str] | None) -> httpx.Response:
    with httpx.Client(transport=httpx.HTTPTransport(retries=3), timeout=30) as client:
        return client.get(url=url, params=payload, headers=headers)


@tool
def organization_search(
    query: str,
    located_postal_code: str | None = None,
    located_admin1: str | None = None,
    search_mode: Literal["organization_only", "organization_and_grants"] | None = "organization_only"
) -> list[OrganizationRecord] | str:
    """Search for organizations by name, description or work, program descriptions and locations. Here are some
    guidelines:
    * `query` controls hybrid searching involving both vector search and keyword search
    * `query` can be used to find organizations based on a description of work
    * if the query is intended to be a lookup of an organization by name, then adding quotes around the `query` string
     circumvents vector search, and prioritizes keyword matching on names (eg. `query=Candid` --> `query='Candid'`)
    * if the query is an EIN (eg. 12-3456789) then keyword searching is prioritized to get exact matches
    * adding location information such as postal codes and/or admin1 (state/province abbreviations) will filter results

    This tool should be used as a first step in any downstream task which requires identifying the nonprofit that the
    user is identifying with. Often, the `nonprofit_id` is required, and that can be found via a search.

    Parameters
    ----------
    query : str
        Free text query which drives the search functionality. This uses a hybrid approach of vector and keyword
        searching, but under certain conditions expressed in the 'guidelines' this may disable vector search.
    located_postal_code : str | None, optional
        Postal code of the organization to be searched, if provided, by default None
    located_admin1 : str | None, optional
        Admin1 code (state/province abbreviation) of the organization to be searched, if provided, by default None
    search_mode : Literal["organization_only", "organization_and_grants"] | None, optional
        Choose how to search for organizations, if `None` or "organization_and_grants" then this will examine evidence
        at the organization level as well as at the historical grant transaction level capturing activity evidence. For
        name lookups it is best to use the "organization_only" default value, by default "organization_only"

    Returns
    -------
    list[OrganizationRecord] | str
        List of the top organization search results
        If output is a string then that means there was some error, and retry should be considered
    """

    payload = {"query": query, "searchMode": search_mode, "rowCount": 5}
    if located_postal_code is not None:
        payload["postalCode"] = located_postal_code
    if located_admin1 is not None:
        payload["admin1"] = located_admin1

    with httpx.Client(transport=httpx.HTTPTransport(retries=3), timeout=30) as client:
        r = client.get(
            url=SEARCH.endpoint("v1/search"),
            params=payload,
            headers={**SEARCH.header} # type: ignore
        )

        if r.status_code != 200:
            logger.error("Error calling organization search API %s. Error: %s", str(r.request.url), r.reason_phrase)
            return f"Error calling organization search. Error: {r.reason_phrase}"

        data: dict = r.json()

    output = []
    for org in data.get("returnedOrgs") or []:
        working_on, serving = [], []
        for code, description in org["taxonomy"].items():
            code: str
            description: str

            if code.startswith('P') and len(code) > 2:
                serving.append(description.lower())
            elif code.startswith('S'):
                working_on.append(description.lower())

        # output.append({
        #     "nonprofit_id": org["candidEntityID"],
        #     "name": org["orgName"],
        #     "aka_name": org["akaName"],
        #     "acronym": org["acronymName"],
        #     "city": org["city"],
        #     "admin1": org["admin1"],
        #     "country": org["countryName"],
        #     "EIN": org["ein"],
        #     "profile_link": format_candid_profile_link(org['candidEntityID']),
        #     "working_on": f"Working on {', '.join(working_on)}",
        #     "serving": f"Serving population groups {', '.join(serving)}",
        #     "transparency_level": org["seal"].get("description"),
        #     "organization_roles": ', '.join(org["roles"]),
        #     "grants_awarded": ', '.join([f"{k}: {v}" for k, v in org["transactionsGiven"].items()]),
        #     "grants_received": ', '.join([f"{k}: {v}" for k, v in org["transactionsReceived"].items()])
        # })
        output.append(OrganizationRecord(
            nonprofit_id=org["candidEntityID"],
            name=org["orgName"],
            aka_name=org["akaName"],
            acronym=org["acronymName"],
            city=org["city"],
            admin1=org["admin1"],
            country=org["countryName"],
            ein=org["ein"],
            profile_link=format_candid_profile_link(org['candidEntityID']),
            working_on=f"Working on {', '.join(working_on)}",
            serving=f"Serving population groups {', '.join(serving)}",
            transparency_level=org["seal"].get("description"),
            organization_roles=', '.join(org["roles"]),
            grants_awarded=', '.join([f"{k}: {v}" for k, v in org["transactionsGiven"].items()]),
            grants_received=', '.join([f"{k}: {v}" for k, v in org["transactionsReceived"].items()])
        ))
    return output


@tool
def recommend_funders(
    nonprofit_id: int,
    subject_codes_of_program: str | None = None,
    populations_served_codes_of_program: str | None = None,
    geonameids_of_geographies_served: str | None = None,
    include_past_funders: bool = False
) -> tuple[dict[str, Any], list[dict[str, Any]]] | str:
    """Recommend potential funding organizations to a nonprofit seeking a grant.

    These recommendations are built using machine learning over a heterogeneous knowledge graph representing the work of
    the requesting organization, and the contextual recent activities of potential funders, and their grant recipients.

    While extra subject codes, populations served codes, and geography IDs for where the program takes place is not
    required, recommendations tend to improve and become more specific the more information can be provided.

    Subjects and populations can be determined using the `autocode` tool if the requester can supply a description of
    the program they are seeking funding for.

    Geographies can be determined using the geo detection tool if the requester can supply a description of the program
    they are seeking funding for.

    Key Usage Requirements:
    - Always incorporate returned profile URLs directly into the response text
    - Replace funding organization name mentions with hyperlinked Candid profile URLs
    - Prioritize creating a seamless user experience by making URLs contextually relevant
    - Use relevant recipient data as well as inferred metadata to provide explanations about recommendation relevance

    Parameters
    ----------
    nonprofit_id : int
        The unique identifier of the requesting organization. This will need to be found from a search using inputs
        elicited from the requester
    subject_codes_of_program : str | None, optional
        Subject codes from Candid's PCS taxonomy, comma separated, by default None
    populations_served_codes_of_program : str | None, optional
        Population groups served codes from Candid's PCS taxonomy, comma separated, by default None
    geonameids_of_geographies_served : str | None, optional
        Geonames ID values for geographies served by the requester's program, comma separted, by default None
    include_past_funders : bool, optional
        Boolean flag to indicate whether previous funders of the input organization identified by the `nonprofit_id`
        should be excluded. If the requester would like to reconsider previous funding organizations then set this to
        `True`, but the requester MUST be prompted to indicate this preference. Using the default value will help the
        requester discover new, potentially relevant funders, by default False

    Examples
    --------
    >>> recommend_funders(nonprofit_id=9981881)
    >>> reccommend_funders(
        nonprofit_id=9173173,
        subject_codes_of_program='SS050000, SS000000,SB050000',
        populations_served_codes_of_program='PJ050100',
        geonameids_of_geographies_served='4094212,4094212'
    )

    Returns
    -------
    tuple[dict[str, Any], list[dict[str, Any]]] | str
        (Inferred data used to generate recommendations, array of funders being recommended)
        If output is a string then that means there was some error, and retry should be considered
    """

    payload = {
        "candid_entity_id": nonprofit_id,
        "use_programs": True,
        "top_k": 5,
        "include_past_funders": include_past_funders
    }

    if subject_codes_of_program is not None:
        payload["subjects"] = subject_codes_of_program
    if populations_served_codes_of_program is not None:
        payload["populations"] = populations_served_codes_of_program
    if geonameids_of_geographies_served:
        payload["geos"] = geonameids_of_geographies_served

    r = get_with_retries(
        url=FUNDER_RECOMMENDATION.endpoint("funder/pcs-v3"),
        payload=payload,
        headers={**FUNDER_RECOMMENDATION.header}
    )
    assert isinstance(r, httpx.Response)
    if r.status_code != 200:
        logger.error("Error calling funder recommendations API %s. Error: %s", str(r.request.url), r.reason_phrase)
        return f"Error calling funder recommendations. Error: {r.reason_phrase}"

    data: dict = r.json()
    return (
        data.get("meta") or {},
        [{
            **r,
            "profile_link": format_candid_profile_link(r['funder_id'])
        } for r in (data.get("recommendations") or [])]
    )


@tool
def recommend_funding_opportunities(
    nonprofit_id: int,
    subject_codes_of_program: str | None = None,
    populations_served_codes_of_program: str | None = None,
    geonameids_of_geographies_served: str | None = None
) -> tuple[dict[str, Any], list[dict[str, Any]]] | str:
    """Recommend active funding opportunities (RFPs) to a nonprofit seeking a grant.

    These recommendations are built using machine learning over a heterogeneous knowledge graph representing the work of
    the requesting organization, and the contextual recent activities of potential funders, and their grant recipients.

    While extra subject codes, populations served codes, and geography IDs for where the program takes place is not
    required, recommendations tend to improve and become more specific the more information can be provided.

    Subjects and populations can be determined using the `autocode` tool if the requester can supply a description of
    the program they are seeking funding for.

    Key Usage Requirements:
    - Always incorporate returned profile URLs directly into the response text
    - Replace funding organization name mentions with hyperlinked Candid profile URLs
    - Prioritize creating a seamless user experience by making URLs contextually relevant
    - Use inferred metadata to provide explanations about recommendation relevance

    Parameters
    ----------
    nonprofit_id : int
        The unique identifier of the requesting organization. This will need to be found from a search using inputs
        elicited from the requeter
    subject_codes_of_program : str | None, optional
        Subject codes from Candid's PCS taxonomy, comma separated, by default None
    populations_served_codes_of_program : str | None, optional
        Population groups served codes from Candid's PCS taxonomy, comma separated, by default None
    geonameids_of_geographies_served : str | None, optional
        Geonames ID values for geographies served by the requester's program, comma separted, by default None

    Examples
    --------
    >>> recommend_funding_opportunities(nonprofit_id=9981881)
    >>> recommend_funding_opportunities(
        nonprofit_id=9173173,
        subject_codes_of_program='SS050000, SS000000,SB050000',
        populations_served_codes_of_program='PJ050100',
        geonameids_of_geographies_served='4094212,4094212'
    )

    Returns
    -------
    tuple[dict[str, Any], list[dict[str, Any]]] | str
        (Inferred data used to generate recommendations, array of active funding opportunities being recommended)
        If output is a string then that means there was some error, and retry should be considered
    """

    payload = {"candid_entity_id": nonprofit_id, "use_programs": True, "top_k": 5}
    if subject_codes_of_program is not None:
        payload["subjects"] = subject_codes_of_program
    if populations_served_codes_of_program is not None:
        payload["populations"] = populations_served_codes_of_program
    if geonameids_of_geographies_served:
        payload["geos"] = geonameids_of_geographies_served

    r = get_with_retries(
        url=FUNDER_RECOMMENDATION.endpoint("rfp/pcs-v3"),
        payload=payload,
        headers={**FUNDER_RECOMMENDATION.header}
    )
    assert isinstance(r, httpx.Response)
    if r.status_code != 200:
        logger.error("Error calling RFP recommendation API %s. Error: %s", str(r.request.url), r.reason_phrase)
        return f"Error calling RFP recommendations. Error: {r.reason_phrase}"

    data: dict = r.json()
    return (
        data.get("meta") or {},
        [{
            **r,
            "profile_link": format_candid_profile_link(r['funder_id'])
        } for r in (data.get("recommendations") or [])]
    )