Spaces:
Running
Running
| from typing import TypedDict, Literal, Annotated, Any | |
| import logging | |
| from langchain_core.tools import tool | |
| import httpx | |
| from ask_candid.tools.utils import format_candid_profile_link | |
| from ask_candid.base.utils import retry_on_status | |
| from ask_candid.base.config.rest import FUNDER_RECOMMENDATION, SEARCH | |
| logging.basicConfig(format="[%(levelname)s] (%(asctime)s) :: %(message)s") | |
| logger = logging.getLogger(__name__) | |
| logger.setLevel(logging.ERROR) | |
| class OrganizationRecord(TypedDict): | |
| nonprofit_id: Annotated[str, "Unique Candid ID value for the organization"] | |
| name: Annotated[str, "Name of the organization"] | |
| aka_name: Annotated[str, "'Also-known-as' name of the organization"] | |
| acronym: Annotated[str, "Acronym of the name of the organization"] | |
| city: Annotated[str, "City that the organization is located in"] | |
| admin1: Annotated[str, "State, province, or canton that the organization is located in"] | |
| country: Annotated[str, "Country that the organization is located in"] | |
| ein: Annotated[str, "IRS employer identification number (EIN) of the organization, only relevant for US-based orgs"] | |
| profile_link: Annotated[str, "Link to the Candid profile for the organization"] | |
| working_on: Annotated[str, "Description of the subject purpose of the organization"] | |
| serving: Annotated[str, "Description of the population groups served by the organization"] | |
| transparency_level: Annotated[str, "Candid Seal level of the organization indicating transparency level"] | |
| organization_roles: Annotated[str, "Roles of the organization (eg. grantmaker, recipient)"] | |
| grants_awarded: Annotated[str, "Summary stats of the grants awarded by the organization"] | |
| grants_received: Annotated[str, "Summary stats of the grants received by the organization"] | |
| def get_with_retries(url: str, payload: dict[str, Any] | None, headers: dict[str, str] | None) -> httpx.Response: | |
| with httpx.Client(transport=httpx.HTTPTransport(retries=3), timeout=30) as client: | |
| return client.get(url=url, params=payload, headers=headers) | |
| def organization_search( | |
| query: str, | |
| located_postal_code: str | None = None, | |
| located_admin1: str | None = None, | |
| search_mode: Literal["organization_only", "organization_and_grants"] | None = "organization_only" | |
| ) -> list[OrganizationRecord] | str: | |
| """Search for organizations by name, description or work, program descriptions and locations. Here are some | |
| guidelines: | |
| * `query` controls hybrid searching involving both vector search and keyword search | |
| * `query` can be used to find organizations based on a description of work | |
| * if the query is intended to be a lookup of an organization by name, then adding quotes around the `query` string | |
| circumvents vector search, and prioritizes keyword matching on names (eg. `query=Candid` --> `query='Candid'`) | |
| * if the query is an EIN (eg. 12-3456789) then keyword searching is prioritized to get exact matches | |
| * adding location information such as postal codes and/or admin1 (state/province abbreviations) will filter results | |
| This tool should be used as a first step in any downstream task which requires identifying the nonprofit that the | |
| user is identifying with. Often, the `nonprofit_id` is required, and that can be found via a search. | |
| Parameters | |
| ---------- | |
| query : str | |
| Free text query which drives the search functionality. This uses a hybrid approach of vector and keyword | |
| searching, but under certain conditions expressed in the 'guidelines' this may disable vector search. | |
| located_postal_code : str | None, optional | |
| Postal code of the organization to be searched, if provided, by default None | |
| located_admin1 : str | None, optional | |
| Admin1 code (state/province abbreviation) of the organization to be searched, if provided, by default None | |
| search_mode : Literal["organization_only", "organization_and_grants"] | None, optional | |
| Choose how to search for organizations, if `None` or "organization_and_grants" then this will examine evidence | |
| at the organization level as well as at the historical grant transaction level capturing activity evidence. For | |
| name lookups it is best to use the "organization_only" default value, by default "organization_only" | |
| Returns | |
| ------- | |
| list[OrganizationRecord] | str | |
| List of the top organization search results | |
| If output is a string then that means there was some error, and retry should be considered | |
| """ | |
| payload = {"query": query, "searchMode": search_mode, "rowCount": 5} | |
| if located_postal_code is not None: | |
| payload["postalCode"] = located_postal_code | |
| if located_admin1 is not None: | |
| payload["admin1"] = located_admin1 | |
| with httpx.Client(transport=httpx.HTTPTransport(retries=3), timeout=30) as client: | |
| r = client.get( | |
| url=SEARCH.endpoint("v1/search"), | |
| params=payload, | |
| headers={**SEARCH.header} # type: ignore | |
| ) | |
| if r.status_code != 200: | |
| logger.error("Error calling organization search API %s. Error: %s", str(r.request.url), r.reason_phrase) | |
| return f"Error calling organization search. Error: {r.reason_phrase}" | |
| data: dict = r.json() | |
| output = [] | |
| for org in data.get("returnedOrgs") or []: | |
| working_on, serving = [], [] | |
| for code, description in org["taxonomy"].items(): | |
| code: str | |
| description: str | |
| if code.startswith('P') and len(code) > 2: | |
| serving.append(description.lower()) | |
| elif code.startswith('S'): | |
| working_on.append(description.lower()) | |
| # output.append({ | |
| # "nonprofit_id": org["candidEntityID"], | |
| # "name": org["orgName"], | |
| # "aka_name": org["akaName"], | |
| # "acronym": org["acronymName"], | |
| # "city": org["city"], | |
| # "admin1": org["admin1"], | |
| # "country": org["countryName"], | |
| # "EIN": org["ein"], | |
| # "profile_link": format_candid_profile_link(org['candidEntityID']), | |
| # "working_on": f"Working on {', '.join(working_on)}", | |
| # "serving": f"Serving population groups {', '.join(serving)}", | |
| # "transparency_level": org["seal"].get("description"), | |
| # "organization_roles": ', '.join(org["roles"]), | |
| # "grants_awarded": ', '.join([f"{k}: {v}" for k, v in org["transactionsGiven"].items()]), | |
| # "grants_received": ', '.join([f"{k}: {v}" for k, v in org["transactionsReceived"].items()]) | |
| # }) | |
| output.append(OrganizationRecord( | |
| nonprofit_id=org["candidEntityID"], | |
| name=org["orgName"], | |
| aka_name=org["akaName"], | |
| acronym=org["acronymName"], | |
| city=org["city"], | |
| admin1=org["admin1"], | |
| country=org["countryName"], | |
| ein=org["ein"], | |
| profile_link=format_candid_profile_link(org['candidEntityID']), | |
| working_on=f"Working on {', '.join(working_on)}", | |
| serving=f"Serving population groups {', '.join(serving)}", | |
| transparency_level=org["seal"].get("description"), | |
| organization_roles=', '.join(org["roles"]), | |
| grants_awarded=', '.join([f"{k}: {v}" for k, v in org["transactionsGiven"].items()]), | |
| grants_received=', '.join([f"{k}: {v}" for k, v in org["transactionsReceived"].items()]) | |
| )) | |
| return output | |
| def recommend_funders( | |
| nonprofit_id: int, | |
| subject_codes_of_program: str | None = None, | |
| populations_served_codes_of_program: str | None = None, | |
| geonameids_of_geographies_served: str | None = None, | |
| include_past_funders: bool = False | |
| ) -> tuple[dict[str, Any], list[dict[str, Any]]] | str: | |
| """Recommend potential funding organizations to a nonprofit seeking a grant. | |
| These recommendations are built using machine learning over a heterogeneous knowledge graph representing the work of | |
| the requesting organization, and the contextual recent activities of potential funders, and their grant recipients. | |
| While extra subject codes, populations served codes, and geography IDs for where the program takes place is not | |
| required, recommendations tend to improve and become more specific the more information can be provided. | |
| Subjects and populations can be determined using the `autocode` tool if the requester can supply a description of | |
| the program they are seeking funding for. | |
| Geographies can be determined using the geo detection tool if the requester can supply a description of the program | |
| they are seeking funding for. | |
| Key Usage Requirements: | |
| - Always incorporate returned profile URLs directly into the response text | |
| - Replace funding organization name mentions with hyperlinked Candid profile URLs | |
| - Prioritize creating a seamless user experience by making URLs contextually relevant | |
| - Use relevant recipient data as well as inferred metadata to provide explanations about recommendation relevance | |
| Parameters | |
| ---------- | |
| nonprofit_id : int | |
| The unique identifier of the requesting organization. This will need to be found from a search using inputs | |
| elicited from the requester | |
| subject_codes_of_program : str | None, optional | |
| Subject codes from Candid's PCS taxonomy, comma separated, by default None | |
| populations_served_codes_of_program : str | None, optional | |
| Population groups served codes from Candid's PCS taxonomy, comma separated, by default None | |
| geonameids_of_geographies_served : str | None, optional | |
| Geonames ID values for geographies served by the requester's program, comma separted, by default None | |
| include_past_funders : bool, optional | |
| Boolean flag to indicate whether previous funders of the input organization identified by the `nonprofit_id` | |
| should be excluded. If the requester would like to reconsider previous funding organizations then set this to | |
| `True`, but the requester MUST be prompted to indicate this preference. Using the default value will help the | |
| requester discover new, potentially relevant funders, by default False | |
| Examples | |
| -------- | |
| >>> recommend_funders(nonprofit_id=9981881) | |
| >>> reccommend_funders( | |
| nonprofit_id=9173173, | |
| subject_codes_of_program='SS050000, SS000000,SB050000', | |
| populations_served_codes_of_program='PJ050100', | |
| geonameids_of_geographies_served='4094212,4094212' | |
| ) | |
| Returns | |
| ------- | |
| tuple[dict[str, Any], list[dict[str, Any]]] | str | |
| (Inferred data used to generate recommendations, array of funders being recommended) | |
| If output is a string then that means there was some error, and retry should be considered | |
| """ | |
| payload = { | |
| "candid_entity_id": nonprofit_id, | |
| "use_programs": True, | |
| "top_k": 5, | |
| "include_past_funders": include_past_funders | |
| } | |
| if subject_codes_of_program is not None: | |
| payload["subjects"] = subject_codes_of_program | |
| if populations_served_codes_of_program is not None: | |
| payload["populations"] = populations_served_codes_of_program | |
| if geonameids_of_geographies_served: | |
| payload["geos"] = geonameids_of_geographies_served | |
| r = get_with_retries( | |
| url=FUNDER_RECOMMENDATION.endpoint("funder/pcs-v3"), | |
| payload=payload, | |
| headers={**FUNDER_RECOMMENDATION.header} | |
| ) | |
| assert isinstance(r, httpx.Response) | |
| if r.status_code != 200: | |
| logger.error("Error calling funder recommendations API %s. Error: %s", str(r.request.url), r.reason_phrase) | |
| return f"Error calling funder recommendations. Error: {r.reason_phrase}" | |
| data: dict = r.json() | |
| return ( | |
| data.get("meta") or {}, | |
| [{ | |
| **r, | |
| "profile_link": format_candid_profile_link(r['funder_id']) | |
| } for r in (data.get("recommendations") or [])] | |
| ) | |
| def recommend_funding_opportunities( | |
| nonprofit_id: int, | |
| subject_codes_of_program: str | None = None, | |
| populations_served_codes_of_program: str | None = None, | |
| geonameids_of_geographies_served: str | None = None | |
| ) -> tuple[dict[str, Any], list[dict[str, Any]]] | str: | |
| """Recommend active funding opportunities (RFPs) to a nonprofit seeking a grant. | |
| These recommendations are built using machine learning over a heterogeneous knowledge graph representing the work of | |
| the requesting organization, and the contextual recent activities of potential funders, and their grant recipients. | |
| While extra subject codes, populations served codes, and geography IDs for where the program takes place is not | |
| required, recommendations tend to improve and become more specific the more information can be provided. | |
| Subjects and populations can be determined using the `autocode` tool if the requester can supply a description of | |
| the program they are seeking funding for. | |
| Key Usage Requirements: | |
| - Always incorporate returned profile URLs directly into the response text | |
| - Replace funding organization name mentions with hyperlinked Candid profile URLs | |
| - Prioritize creating a seamless user experience by making URLs contextually relevant | |
| - Use inferred metadata to provide explanations about recommendation relevance | |
| Parameters | |
| ---------- | |
| nonprofit_id : int | |
| The unique identifier of the requesting organization. This will need to be found from a search using inputs | |
| elicited from the requeter | |
| subject_codes_of_program : str | None, optional | |
| Subject codes from Candid's PCS taxonomy, comma separated, by default None | |
| populations_served_codes_of_program : str | None, optional | |
| Population groups served codes from Candid's PCS taxonomy, comma separated, by default None | |
| geonameids_of_geographies_served : str | None, optional | |
| Geonames ID values for geographies served by the requester's program, comma separted, by default None | |
| Examples | |
| -------- | |
| >>> recommend_funding_opportunities(nonprofit_id=9981881) | |
| >>> recommend_funding_opportunities( | |
| nonprofit_id=9173173, | |
| subject_codes_of_program='SS050000, SS000000,SB050000', | |
| populations_served_codes_of_program='PJ050100', | |
| geonameids_of_geographies_served='4094212,4094212' | |
| ) | |
| Returns | |
| ------- | |
| tuple[dict[str, Any], list[dict[str, Any]]] | str | |
| (Inferred data used to generate recommendations, array of active funding opportunities being recommended) | |
| If output is a string then that means there was some error, and retry should be considered | |
| """ | |
| payload = {"candid_entity_id": nonprofit_id, "use_programs": True, "top_k": 5} | |
| if subject_codes_of_program is not None: | |
| payload["subjects"] = subject_codes_of_program | |
| if populations_served_codes_of_program is not None: | |
| payload["populations"] = populations_served_codes_of_program | |
| if geonameids_of_geographies_served: | |
| payload["geos"] = geonameids_of_geographies_served | |
| r = get_with_retries( | |
| url=FUNDER_RECOMMENDATION.endpoint("rfp/pcs-v3"), | |
| payload=payload, | |
| headers={**FUNDER_RECOMMENDATION.header} | |
| ) | |
| assert isinstance(r, httpx.Response) | |
| if r.status_code != 200: | |
| logger.error("Error calling RFP recommendation API %s. Error: %s", str(r.request.url), r.reason_phrase) | |
| return f"Error calling RFP recommendations. Error: {r.reason_phrase}" | |
| data: dict = r.json() | |
| return ( | |
| data.get("meta") or {}, | |
| [{ | |
| **r, | |
| "profile_link": format_candid_profile_link(r['funder_id']) | |
| } for r in (data.get("recommendations") or [])] | |
| ) | |