Spaces:

CandidAI
/

ask-candid

Running

App Files Files Community

ask-candid / ask_candid /tools /recommendations.py

brainsqueeze

v3 (#2)

f5c9c80 verified 2 months ago

raw

history blame contribute delete

15.8 kB

	from typing import TypedDict, Literal, Annotated, Any
	import logging

	from langchain_core.tools import tool
	import httpx

	from ask_candid.tools.utils import format_candid_profile_link
	from ask_candid.base.utils import retry_on_status
	from ask_candid.base.config.rest import FUNDER_RECOMMENDATION, SEARCH

	logging.basicConfig(format="[%(levelname)s] (%(asctime)s) :: %(message)s")
	logger = logging.getLogger(__name__)
	logger.setLevel(logging.ERROR)


	class OrganizationRecord(TypedDict):
	nonprofit_id: Annotated[str, "Unique Candid ID value for the organization"]
	name: Annotated[str, "Name of the organization"]
	aka_name: Annotated[str, "'Also-known-as' name of the organization"]
	acronym: Annotated[str, "Acronym of the name of the organization"]
	city: Annotated[str, "City that the organization is located in"]
	admin1: Annotated[str, "State, province, or canton that the organization is located in"]
	country: Annotated[str, "Country that the organization is located in"]
	ein: Annotated[str, "IRS employer identification number (EIN) of the organization, only relevant for US-based orgs"]
	profile_link: Annotated[str, "Link to the Candid profile for the organization"]
	working_on: Annotated[str, "Description of the subject purpose of the organization"]
	serving: Annotated[str, "Description of the population groups served by the organization"]
	transparency_level: Annotated[str, "Candid Seal level of the organization indicating transparency level"]
	organization_roles: Annotated[str, "Roles of the organization (eg. grantmaker, recipient)"]
	grants_awarded: Annotated[str, "Summary stats of the grants awarded by the organization"]
	grants_received: Annotated[str, "Summary stats of the grants received by the organization"]


	@retry_on_status(num_retries=3)
	def get_with_retries(url: str, payload: dict[str, Any] \| None, headers: dict[str, str] \| None) -> httpx.Response:
	with httpx.Client(transport=httpx.HTTPTransport(retries=3), timeout=30) as client:
	return client.get(url=url, params=payload, headers=headers)


	@tool
	def organization_search(
	query: str,
	located_postal_code: str \| None = None,
	located_admin1: str \| None = None,
	search_mode: Literal["organization_only", "organization_and_grants"] \| None = "organization_only"
	) -> list[OrganizationRecord] \| str:
	"""Search for organizations by name, description or work, program descriptions and locations. Here are some
	guidelines:
	* `query` controls hybrid searching involving both vector search and keyword search
	* `query` can be used to find organizations based on a description of work
	* if the query is intended to be a lookup of an organization by name, then adding quotes around the `query` string
	circumvents vector search, and prioritizes keyword matching on names (eg. `query=Candid` --> `query='Candid'`)
	* if the query is an EIN (eg. 12-3456789) then keyword searching is prioritized to get exact matches
	* adding location information such as postal codes and/or admin1 (state/province abbreviations) will filter results

	This tool should be used as a first step in any downstream task which requires identifying the nonprofit that the
	user is identifying with. Often, the `nonprofit_id` is required, and that can be found via a search.

	Parameters
	----------
	query : str
	Free text query which drives the search functionality. This uses a hybrid approach of vector and keyword
	searching, but under certain conditions expressed in the 'guidelines' this may disable vector search.
	located_postal_code : str \| None, optional
	Postal code of the organization to be searched, if provided, by default None
	located_admin1 : str \| None, optional
	Admin1 code (state/province abbreviation) of the organization to be searched, if provided, by default None
	search_mode : Literal["organization_only", "organization_and_grants"] \| None, optional
	Choose how to search for organizations, if `None` or "organization_and_grants" then this will examine evidence
	at the organization level as well as at the historical grant transaction level capturing activity evidence. For
	name lookups it is best to use the "organization_only" default value, by default "organization_only"

	Returns
	-------
	list[OrganizationRecord] \| str
	List of the top organization search results
	If output is a string then that means there was some error, and retry should be considered
	"""

	payload = {"query": query, "searchMode": search_mode, "rowCount": 5}
	if located_postal_code is not None:
	payload["postalCode"] = located_postal_code
	if located_admin1 is not None:
	payload["admin1"] = located_admin1

	with httpx.Client(transport=httpx.HTTPTransport(retries=3), timeout=30) as client:
	r = client.get(
	url=SEARCH.endpoint("v1/search"),
	params=payload,
	headers={**SEARCH.header} # type: ignore
	)

	if r.status_code != 200:
	logger.error("Error calling organization search API %s. Error: %s", str(r.request.url), r.reason_phrase)
	return f"Error calling organization search. Error: {r.reason_phrase}"

	data: dict = r.json()

	output = []
	for org in data.get("returnedOrgs") or []:
	working_on, serving = [], []
	for code, description in org["taxonomy"].items():
	code: str
	description: str

	if code.startswith('P') and len(code) > 2:
	serving.append(description.lower())
	elif code.startswith('S'):
	working_on.append(description.lower())

	# output.append({
	# "nonprofit_id": org["candidEntityID"],
	# "name": org["orgName"],
	# "aka_name": org["akaName"],
	# "acronym": org["acronymName"],
	# "city": org["city"],
	# "admin1": org["admin1"],
	# "country": org["countryName"],
	# "EIN": org["ein"],
	# "profile_link": format_candid_profile_link(org['candidEntityID']),
	# "working_on": f"Working on {', '.join(working_on)}",
	# "serving": f"Serving population groups {', '.join(serving)}",
	# "transparency_level": org["seal"].get("description"),
	# "organization_roles": ', '.join(org["roles"]),
	# "grants_awarded": ', '.join([f"{k}: {v}" for k, v in org["transactionsGiven"].items()]),
	# "grants_received": ', '.join([f"{k}: {v}" for k, v in org["transactionsReceived"].items()])
	# })
	output.append(OrganizationRecord(
	nonprofit_id=org["candidEntityID"],
	name=org["orgName"],
	aka_name=org["akaName"],
	acronym=org["acronymName"],
	city=org["city"],
	admin1=org["admin1"],
	country=org["countryName"],
	ein=org["ein"],
	profile_link=format_candid_profile_link(org['candidEntityID']),
	working_on=f"Working on {', '.join(working_on)}",
	serving=f"Serving population groups {', '.join(serving)}",
	transparency_level=org["seal"].get("description"),
	organization_roles=', '.join(org["roles"]),
	grants_awarded=', '.join([f"{k}: {v}" for k, v in org["transactionsGiven"].items()]),
	grants_received=', '.join([f"{k}: {v}" for k, v in org["transactionsReceived"].items()])
	))
	return output


	@tool
	def recommend_funders(
	nonprofit_id: int,
	subject_codes_of_program: str \| None = None,
	populations_served_codes_of_program: str \| None = None,
	geonameids_of_geographies_served: str \| None = None,
	include_past_funders: bool = False
	) -> tuple[dict[str, Any], list[dict[str, Any]]] \| str:
	"""Recommend potential funding organizations to a nonprofit seeking a grant.

	These recommendations are built using machine learning over a heterogeneous knowledge graph representing the work of
	the requesting organization, and the contextual recent activities of potential funders, and their grant recipients.

	While extra subject codes, populations served codes, and geography IDs for where the program takes place is not
	required, recommendations tend to improve and become more specific the more information can be provided.

	Subjects and populations can be determined using the `autocode` tool if the requester can supply a description of
	the program they are seeking funding for.

	Geographies can be determined using the geo detection tool if the requester can supply a description of the program
	they are seeking funding for.

	Key Usage Requirements:
	- Always incorporate returned profile URLs directly into the response text
	- Replace funding organization name mentions with hyperlinked Candid profile URLs
	- Prioritize creating a seamless user experience by making URLs contextually relevant
	- Use relevant recipient data as well as inferred metadata to provide explanations about recommendation relevance

	Parameters
	----------
	nonprofit_id : int
	The unique identifier of the requesting organization. This will need to be found from a search using inputs
	elicited from the requester
	subject_codes_of_program : str \| None, optional
	Subject codes from Candid's PCS taxonomy, comma separated, by default None
	populations_served_codes_of_program : str \| None, optional
	Population groups served codes from Candid's PCS taxonomy, comma separated, by default None
	geonameids_of_geographies_served : str \| None, optional
	Geonames ID values for geographies served by the requester's program, comma separted, by default None
	include_past_funders : bool, optional
	Boolean flag to indicate whether previous funders of the input organization identified by the `nonprofit_id`
	should be excluded. If the requester would like to reconsider previous funding organizations then set this to
	`True`, but the requester MUST be prompted to indicate this preference. Using the default value will help the
	requester discover new, potentially relevant funders, by default False

	Examples
	--------
	>>> recommend_funders(nonprofit_id=9981881)
	>>> reccommend_funders(
	nonprofit_id=9173173,
	subject_codes_of_program='SS050000, SS000000,SB050000',
	populations_served_codes_of_program='PJ050100',
	geonameids_of_geographies_served='4094212,4094212'
	)

	Returns
	-------
	tuple[dict[str, Any], list[dict[str, Any]]] \| str
	(Inferred data used to generate recommendations, array of funders being recommended)
	If output is a string then that means there was some error, and retry should be considered
	"""

	payload = {
	"candid_entity_id": nonprofit_id,
	"use_programs": True,
	"top_k": 5,
	"include_past_funders": include_past_funders
	}

	if subject_codes_of_program is not None:
	payload["subjects"] = subject_codes_of_program
	if populations_served_codes_of_program is not None:
	payload["populations"] = populations_served_codes_of_program
	if geonameids_of_geographies_served:
	payload["geos"] = geonameids_of_geographies_served

	r = get_with_retries(
	url=FUNDER_RECOMMENDATION.endpoint("funder/pcs-v3"),
	payload=payload,
	headers={**FUNDER_RECOMMENDATION.header}
	)
	assert isinstance(r, httpx.Response)
	if r.status_code != 200:
	logger.error("Error calling funder recommendations API %s. Error: %s", str(r.request.url), r.reason_phrase)
	return f"Error calling funder recommendations. Error: {r.reason_phrase}"

	data: dict = r.json()
	return (
	data.get("meta") or {},
	[{
	**r,
	"profile_link": format_candid_profile_link(r['funder_id'])
	} for r in (data.get("recommendations") or [])]
	)


	@tool
	def recommend_funding_opportunities(
	nonprofit_id: int,
	subject_codes_of_program: str \| None = None,
	populations_served_codes_of_program: str \| None = None,
	geonameids_of_geographies_served: str \| None = None
	) -> tuple[dict[str, Any], list[dict[str, Any]]] \| str:
	"""Recommend active funding opportunities (RFPs) to a nonprofit seeking a grant.

	These recommendations are built using machine learning over a heterogeneous knowledge graph representing the work of
	the requesting organization, and the contextual recent activities of potential funders, and their grant recipients.

	While extra subject codes, populations served codes, and geography IDs for where the program takes place is not
	required, recommendations tend to improve and become more specific the more information can be provided.

	Subjects and populations can be determined using the `autocode` tool if the requester can supply a description of
	the program they are seeking funding for.

	Key Usage Requirements:
	- Always incorporate returned profile URLs directly into the response text
	- Replace funding organization name mentions with hyperlinked Candid profile URLs
	- Prioritize creating a seamless user experience by making URLs contextually relevant
	- Use inferred metadata to provide explanations about recommendation relevance

	Parameters
	----------
	nonprofit_id : int
	The unique identifier of the requesting organization. This will need to be found from a search using inputs
	elicited from the requeter
	subject_codes_of_program : str \| None, optional
	Subject codes from Candid's PCS taxonomy, comma separated, by default None
	populations_served_codes_of_program : str \| None, optional
	Population groups served codes from Candid's PCS taxonomy, comma separated, by default None
	geonameids_of_geographies_served : str \| None, optional
	Geonames ID values for geographies served by the requester's program, comma separted, by default None

	Examples
	--------
	>>> recommend_funding_opportunities(nonprofit_id=9981881)
	>>> recommend_funding_opportunities(
	nonprofit_id=9173173,
	subject_codes_of_program='SS050000, SS000000,SB050000',
	populations_served_codes_of_program='PJ050100',
	geonameids_of_geographies_served='4094212,4094212'
	)

	Returns
	-------
	tuple[dict[str, Any], list[dict[str, Any]]] \| str
	(Inferred data used to generate recommendations, array of active funding opportunities being recommended)
	If output is a string then that means there was some error, and retry should be considered
	"""

	payload = {"candid_entity_id": nonprofit_id, "use_programs": True, "top_k": 5}
	if subject_codes_of_program is not None:
	payload["subjects"] = subject_codes_of_program
	if populations_served_codes_of_program is not None:
	payload["populations"] = populations_served_codes_of_program
	if geonameids_of_geographies_served:
	payload["geos"] = geonameids_of_geographies_served

	r = get_with_retries(
	url=FUNDER_RECOMMENDATION.endpoint("rfp/pcs-v3"),
	payload=payload,
	headers={**FUNDER_RECOMMENDATION.header}
	)
	assert isinstance(r, httpx.Response)
	if r.status_code != 200:
	logger.error("Error calling RFP recommendation API %s. Error: %s", str(r.request.url), r.reason_phrase)
	return f"Error calling RFP recommendations. Error: {r.reason_phrase}"

	data: dict = r.json()
	return (
	data.get("meta") or {},
	[{
	**r,
	"profile_link": format_candid_profile_link(r['funder_id'])
	} for r in (data.get("recommendations") or [])]
	)