ask-candid / ask_candid /tools /recommendations.py
brainsqueeze's picture
v3 (#2)
f5c9c80 verified
from typing import TypedDict, Literal, Annotated, Any
import logging
from langchain_core.tools import tool
import httpx
from ask_candid.tools.utils import format_candid_profile_link
from ask_candid.base.utils import retry_on_status
from ask_candid.base.config.rest import FUNDER_RECOMMENDATION, SEARCH
logging.basicConfig(format="[%(levelname)s] (%(asctime)s) :: %(message)s")
logger = logging.getLogger(__name__)
logger.setLevel(logging.ERROR)
class OrganizationRecord(TypedDict):
nonprofit_id: Annotated[str, "Unique Candid ID value for the organization"]
name: Annotated[str, "Name of the organization"]
aka_name: Annotated[str, "'Also-known-as' name of the organization"]
acronym: Annotated[str, "Acronym of the name of the organization"]
city: Annotated[str, "City that the organization is located in"]
admin1: Annotated[str, "State, province, or canton that the organization is located in"]
country: Annotated[str, "Country that the organization is located in"]
ein: Annotated[str, "IRS employer identification number (EIN) of the organization, only relevant for US-based orgs"]
profile_link: Annotated[str, "Link to the Candid profile for the organization"]
working_on: Annotated[str, "Description of the subject purpose of the organization"]
serving: Annotated[str, "Description of the population groups served by the organization"]
transparency_level: Annotated[str, "Candid Seal level of the organization indicating transparency level"]
organization_roles: Annotated[str, "Roles of the organization (eg. grantmaker, recipient)"]
grants_awarded: Annotated[str, "Summary stats of the grants awarded by the organization"]
grants_received: Annotated[str, "Summary stats of the grants received by the organization"]
@retry_on_status(num_retries=3)
def get_with_retries(url: str, payload: dict[str, Any] | None, headers: dict[str, str] | None) -> httpx.Response:
with httpx.Client(transport=httpx.HTTPTransport(retries=3), timeout=30) as client:
return client.get(url=url, params=payload, headers=headers)
@tool
def organization_search(
query: str,
located_postal_code: str | None = None,
located_admin1: str | None = None,
search_mode: Literal["organization_only", "organization_and_grants"] | None = "organization_only"
) -> list[OrganizationRecord] | str:
"""Search for organizations by name, description or work, program descriptions and locations. Here are some
guidelines:
* `query` controls hybrid searching involving both vector search and keyword search
* `query` can be used to find organizations based on a description of work
* if the query is intended to be a lookup of an organization by name, then adding quotes around the `query` string
circumvents vector search, and prioritizes keyword matching on names (eg. `query=Candid` --> `query='Candid'`)
* if the query is an EIN (eg. 12-3456789) then keyword searching is prioritized to get exact matches
* adding location information such as postal codes and/or admin1 (state/province abbreviations) will filter results
This tool should be used as a first step in any downstream task which requires identifying the nonprofit that the
user is identifying with. Often, the `nonprofit_id` is required, and that can be found via a search.
Parameters
----------
query : str
Free text query which drives the search functionality. This uses a hybrid approach of vector and keyword
searching, but under certain conditions expressed in the 'guidelines' this may disable vector search.
located_postal_code : str | None, optional
Postal code of the organization to be searched, if provided, by default None
located_admin1 : str | None, optional
Admin1 code (state/province abbreviation) of the organization to be searched, if provided, by default None
search_mode : Literal["organization_only", "organization_and_grants"] | None, optional
Choose how to search for organizations, if `None` or "organization_and_grants" then this will examine evidence
at the organization level as well as at the historical grant transaction level capturing activity evidence. For
name lookups it is best to use the "organization_only" default value, by default "organization_only"
Returns
-------
list[OrganizationRecord] | str
List of the top organization search results
If output is a string then that means there was some error, and retry should be considered
"""
payload = {"query": query, "searchMode": search_mode, "rowCount": 5}
if located_postal_code is not None:
payload["postalCode"] = located_postal_code
if located_admin1 is not None:
payload["admin1"] = located_admin1
with httpx.Client(transport=httpx.HTTPTransport(retries=3), timeout=30) as client:
r = client.get(
url=SEARCH.endpoint("v1/search"),
params=payload,
headers={**SEARCH.header} # type: ignore
)
if r.status_code != 200:
logger.error("Error calling organization search API %s. Error: %s", str(r.request.url), r.reason_phrase)
return f"Error calling organization search. Error: {r.reason_phrase}"
data: dict = r.json()
output = []
for org in data.get("returnedOrgs") or []:
working_on, serving = [], []
for code, description in org["taxonomy"].items():
code: str
description: str
if code.startswith('P') and len(code) > 2:
serving.append(description.lower())
elif code.startswith('S'):
working_on.append(description.lower())
# output.append({
# "nonprofit_id": org["candidEntityID"],
# "name": org["orgName"],
# "aka_name": org["akaName"],
# "acronym": org["acronymName"],
# "city": org["city"],
# "admin1": org["admin1"],
# "country": org["countryName"],
# "EIN": org["ein"],
# "profile_link": format_candid_profile_link(org['candidEntityID']),
# "working_on": f"Working on {', '.join(working_on)}",
# "serving": f"Serving population groups {', '.join(serving)}",
# "transparency_level": org["seal"].get("description"),
# "organization_roles": ', '.join(org["roles"]),
# "grants_awarded": ', '.join([f"{k}: {v}" for k, v in org["transactionsGiven"].items()]),
# "grants_received": ', '.join([f"{k}: {v}" for k, v in org["transactionsReceived"].items()])
# })
output.append(OrganizationRecord(
nonprofit_id=org["candidEntityID"],
name=org["orgName"],
aka_name=org["akaName"],
acronym=org["acronymName"],
city=org["city"],
admin1=org["admin1"],
country=org["countryName"],
ein=org["ein"],
profile_link=format_candid_profile_link(org['candidEntityID']),
working_on=f"Working on {', '.join(working_on)}",
serving=f"Serving population groups {', '.join(serving)}",
transparency_level=org["seal"].get("description"),
organization_roles=', '.join(org["roles"]),
grants_awarded=', '.join([f"{k}: {v}" for k, v in org["transactionsGiven"].items()]),
grants_received=', '.join([f"{k}: {v}" for k, v in org["transactionsReceived"].items()])
))
return output
@tool
def recommend_funders(
nonprofit_id: int,
subject_codes_of_program: str | None = None,
populations_served_codes_of_program: str | None = None,
geonameids_of_geographies_served: str | None = None,
include_past_funders: bool = False
) -> tuple[dict[str, Any], list[dict[str, Any]]] | str:
"""Recommend potential funding organizations to a nonprofit seeking a grant.
These recommendations are built using machine learning over a heterogeneous knowledge graph representing the work of
the requesting organization, and the contextual recent activities of potential funders, and their grant recipients.
While extra subject codes, populations served codes, and geography IDs for where the program takes place is not
required, recommendations tend to improve and become more specific the more information can be provided.
Subjects and populations can be determined using the `autocode` tool if the requester can supply a description of
the program they are seeking funding for.
Geographies can be determined using the geo detection tool if the requester can supply a description of the program
they are seeking funding for.
Key Usage Requirements:
- Always incorporate returned profile URLs directly into the response text
- Replace funding organization name mentions with hyperlinked Candid profile URLs
- Prioritize creating a seamless user experience by making URLs contextually relevant
- Use relevant recipient data as well as inferred metadata to provide explanations about recommendation relevance
Parameters
----------
nonprofit_id : int
The unique identifier of the requesting organization. This will need to be found from a search using inputs
elicited from the requester
subject_codes_of_program : str | None, optional
Subject codes from Candid's PCS taxonomy, comma separated, by default None
populations_served_codes_of_program : str | None, optional
Population groups served codes from Candid's PCS taxonomy, comma separated, by default None
geonameids_of_geographies_served : str | None, optional
Geonames ID values for geographies served by the requester's program, comma separted, by default None
include_past_funders : bool, optional
Boolean flag to indicate whether previous funders of the input organization identified by the `nonprofit_id`
should be excluded. If the requester would like to reconsider previous funding organizations then set this to
`True`, but the requester MUST be prompted to indicate this preference. Using the default value will help the
requester discover new, potentially relevant funders, by default False
Examples
--------
>>> recommend_funders(nonprofit_id=9981881)
>>> reccommend_funders(
nonprofit_id=9173173,
subject_codes_of_program='SS050000, SS000000,SB050000',
populations_served_codes_of_program='PJ050100',
geonameids_of_geographies_served='4094212,4094212'
)
Returns
-------
tuple[dict[str, Any], list[dict[str, Any]]] | str
(Inferred data used to generate recommendations, array of funders being recommended)
If output is a string then that means there was some error, and retry should be considered
"""
payload = {
"candid_entity_id": nonprofit_id,
"use_programs": True,
"top_k": 5,
"include_past_funders": include_past_funders
}
if subject_codes_of_program is not None:
payload["subjects"] = subject_codes_of_program
if populations_served_codes_of_program is not None:
payload["populations"] = populations_served_codes_of_program
if geonameids_of_geographies_served:
payload["geos"] = geonameids_of_geographies_served
r = get_with_retries(
url=FUNDER_RECOMMENDATION.endpoint("funder/pcs-v3"),
payload=payload,
headers={**FUNDER_RECOMMENDATION.header}
)
assert isinstance(r, httpx.Response)
if r.status_code != 200:
logger.error("Error calling funder recommendations API %s. Error: %s", str(r.request.url), r.reason_phrase)
return f"Error calling funder recommendations. Error: {r.reason_phrase}"
data: dict = r.json()
return (
data.get("meta") or {},
[{
**r,
"profile_link": format_candid_profile_link(r['funder_id'])
} for r in (data.get("recommendations") or [])]
)
@tool
def recommend_funding_opportunities(
nonprofit_id: int,
subject_codes_of_program: str | None = None,
populations_served_codes_of_program: str | None = None,
geonameids_of_geographies_served: str | None = None
) -> tuple[dict[str, Any], list[dict[str, Any]]] | str:
"""Recommend active funding opportunities (RFPs) to a nonprofit seeking a grant.
These recommendations are built using machine learning over a heterogeneous knowledge graph representing the work of
the requesting organization, and the contextual recent activities of potential funders, and their grant recipients.
While extra subject codes, populations served codes, and geography IDs for where the program takes place is not
required, recommendations tend to improve and become more specific the more information can be provided.
Subjects and populations can be determined using the `autocode` tool if the requester can supply a description of
the program they are seeking funding for.
Key Usage Requirements:
- Always incorporate returned profile URLs directly into the response text
- Replace funding organization name mentions with hyperlinked Candid profile URLs
- Prioritize creating a seamless user experience by making URLs contextually relevant
- Use inferred metadata to provide explanations about recommendation relevance
Parameters
----------
nonprofit_id : int
The unique identifier of the requesting organization. This will need to be found from a search using inputs
elicited from the requeter
subject_codes_of_program : str | None, optional
Subject codes from Candid's PCS taxonomy, comma separated, by default None
populations_served_codes_of_program : str | None, optional
Population groups served codes from Candid's PCS taxonomy, comma separated, by default None
geonameids_of_geographies_served : str | None, optional
Geonames ID values for geographies served by the requester's program, comma separted, by default None
Examples
--------
>>> recommend_funding_opportunities(nonprofit_id=9981881)
>>> recommend_funding_opportunities(
nonprofit_id=9173173,
subject_codes_of_program='SS050000, SS000000,SB050000',
populations_served_codes_of_program='PJ050100',
geonameids_of_geographies_served='4094212,4094212'
)
Returns
-------
tuple[dict[str, Any], list[dict[str, Any]]] | str
(Inferred data used to generate recommendations, array of active funding opportunities being recommended)
If output is a string then that means there was some error, and retry should be considered
"""
payload = {"candid_entity_id": nonprofit_id, "use_programs": True, "top_k": 5}
if subject_codes_of_program is not None:
payload["subjects"] = subject_codes_of_program
if populations_served_codes_of_program is not None:
payload["populations"] = populations_served_codes_of_program
if geonameids_of_geographies_served:
payload["geos"] = geonameids_of_geographies_served
r = get_with_retries(
url=FUNDER_RECOMMENDATION.endpoint("rfp/pcs-v3"),
payload=payload,
headers={**FUNDER_RECOMMENDATION.header}
)
assert isinstance(r, httpx.Response)
if r.status_code != 200:
logger.error("Error calling RFP recommendation API %s. Error: %s", str(r.request.url), r.reason_phrase)
return f"Error calling RFP recommendations. Error: {r.reason_phrase}"
data: dict = r.json()
return (
data.get("meta") or {},
[{
**r,
"profile_link": format_candid_profile_link(r['funder_id'])
} for r in (data.get("recommendations") or [])]
)