File size: 15,797 Bytes
f5c9c80
68e9b80
 
 
 
 
 
 
 
 
 
 
 
 
 
f5c9c80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68e9b80
 
 
 
 
 
 
 
 
 
 
 
f5c9c80
68e9b80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f5c9c80
68e9b80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f5c9c80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68e9b80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f5c9c80
68e9b80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f5c9c80
68e9b80
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
from typing import TypedDict, Literal, Annotated, Any
import logging

from langchain_core.tools import tool
import httpx

from ask_candid.tools.utils import format_candid_profile_link
from ask_candid.base.utils import retry_on_status
from ask_candid.base.config.rest import FUNDER_RECOMMENDATION, SEARCH

logging.basicConfig(format="[%(levelname)s] (%(asctime)s) :: %(message)s")
logger = logging.getLogger(__name__)
logger.setLevel(logging.ERROR)


class OrganizationRecord(TypedDict):
    nonprofit_id: Annotated[str, "Unique Candid ID value for the organization"]
    name: Annotated[str, "Name of the organization"]
    aka_name: Annotated[str, "'Also-known-as' name of the organization"]
    acronym: Annotated[str, "Acronym of the name of the organization"]
    city: Annotated[str, "City that the organization is located in"]
    admin1: Annotated[str, "State, province, or canton that the organization is located in"]
    country: Annotated[str, "Country that the organization is located in"]
    ein: Annotated[str, "IRS employer identification number (EIN) of the organization, only relevant for US-based orgs"]
    profile_link: Annotated[str, "Link to the Candid profile for the organization"]
    working_on: Annotated[str, "Description of the subject purpose of the organization"]
    serving: Annotated[str, "Description of the population groups served by the organization"]
    transparency_level: Annotated[str, "Candid Seal level of the organization indicating transparency level"]
    organization_roles: Annotated[str, "Roles of the organization (eg. grantmaker, recipient)"]
    grants_awarded: Annotated[str, "Summary stats of the grants awarded by the organization"]
    grants_received: Annotated[str, "Summary stats of the grants received by the organization"]


@retry_on_status(num_retries=3)
def get_with_retries(url: str, payload: dict[str, Any] | None, headers: dict[str, str] | None) -> httpx.Response:
    with httpx.Client(transport=httpx.HTTPTransport(retries=3), timeout=30) as client:
        return client.get(url=url, params=payload, headers=headers)


@tool
def organization_search(
    query: str,
    located_postal_code: str | None = None,
    located_admin1: str | None = None,
    search_mode: Literal["organization_only", "organization_and_grants"] | None = "organization_only"
) -> list[OrganizationRecord] | str:
    """Search for organizations by name, description or work, program descriptions and locations. Here are some
    guidelines:
    * `query` controls hybrid searching involving both vector search and keyword search
    * `query` can be used to find organizations based on a description of work
    * if the query is intended to be a lookup of an organization by name, then adding quotes around the `query` string
     circumvents vector search, and prioritizes keyword matching on names (eg. `query=Candid` --> `query='Candid'`)
    * if the query is an EIN (eg. 12-3456789) then keyword searching is prioritized to get exact matches
    * adding location information such as postal codes and/or admin1 (state/province abbreviations) will filter results

    This tool should be used as a first step in any downstream task which requires identifying the nonprofit that the
    user is identifying with. Often, the `nonprofit_id` is required, and that can be found via a search.

    Parameters
    ----------
    query : str
        Free text query which drives the search functionality. This uses a hybrid approach of vector and keyword
        searching, but under certain conditions expressed in the 'guidelines' this may disable vector search.
    located_postal_code : str | None, optional
        Postal code of the organization to be searched, if provided, by default None
    located_admin1 : str | None, optional
        Admin1 code (state/province abbreviation) of the organization to be searched, if provided, by default None
    search_mode : Literal["organization_only", "organization_and_grants"] | None, optional
        Choose how to search for organizations, if `None` or "organization_and_grants" then this will examine evidence
        at the organization level as well as at the historical grant transaction level capturing activity evidence. For
        name lookups it is best to use the "organization_only" default value, by default "organization_only"

    Returns
    -------
    list[OrganizationRecord] | str
        List of the top organization search results
        If output is a string then that means there was some error, and retry should be considered
    """

    payload = {"query": query, "searchMode": search_mode, "rowCount": 5}
    if located_postal_code is not None:
        payload["postalCode"] = located_postal_code
    if located_admin1 is not None:
        payload["admin1"] = located_admin1

    with httpx.Client(transport=httpx.HTTPTransport(retries=3), timeout=30) as client:
        r = client.get(
            url=SEARCH.endpoint("v1/search"),
            params=payload,
            headers={**SEARCH.header} # type: ignore
        )

        if r.status_code != 200:
            logger.error("Error calling organization search API %s. Error: %s", str(r.request.url), r.reason_phrase)
            return f"Error calling organization search. Error: {r.reason_phrase}"

        data: dict = r.json()

    output = []
    for org in data.get("returnedOrgs") or []:
        working_on, serving = [], []
        for code, description in org["taxonomy"].items():
            code: str
            description: str

            if code.startswith('P') and len(code) > 2:
                serving.append(description.lower())
            elif code.startswith('S'):
                working_on.append(description.lower())

        # output.append({
        #     "nonprofit_id": org["candidEntityID"],
        #     "name": org["orgName"],
        #     "aka_name": org["akaName"],
        #     "acronym": org["acronymName"],
        #     "city": org["city"],
        #     "admin1": org["admin1"],
        #     "country": org["countryName"],
        #     "EIN": org["ein"],
        #     "profile_link": format_candid_profile_link(org['candidEntityID']),
        #     "working_on": f"Working on {', '.join(working_on)}",
        #     "serving": f"Serving population groups {', '.join(serving)}",
        #     "transparency_level": org["seal"].get("description"),
        #     "organization_roles": ', '.join(org["roles"]),
        #     "grants_awarded": ', '.join([f"{k}: {v}" for k, v in org["transactionsGiven"].items()]),
        #     "grants_received": ', '.join([f"{k}: {v}" for k, v in org["transactionsReceived"].items()])
        # })
        output.append(OrganizationRecord(
            nonprofit_id=org["candidEntityID"],
            name=org["orgName"],
            aka_name=org["akaName"],
            acronym=org["acronymName"],
            city=org["city"],
            admin1=org["admin1"],
            country=org["countryName"],
            ein=org["ein"],
            profile_link=format_candid_profile_link(org['candidEntityID']),
            working_on=f"Working on {', '.join(working_on)}",
            serving=f"Serving population groups {', '.join(serving)}",
            transparency_level=org["seal"].get("description"),
            organization_roles=', '.join(org["roles"]),
            grants_awarded=', '.join([f"{k}: {v}" for k, v in org["transactionsGiven"].items()]),
            grants_received=', '.join([f"{k}: {v}" for k, v in org["transactionsReceived"].items()])
        ))
    return output


@tool
def recommend_funders(
    nonprofit_id: int,
    subject_codes_of_program: str | None = None,
    populations_served_codes_of_program: str | None = None,
    geonameids_of_geographies_served: str | None = None,
    include_past_funders: bool = False
) -> tuple[dict[str, Any], list[dict[str, Any]]] | str:
    """Recommend potential funding organizations to a nonprofit seeking a grant.

    These recommendations are built using machine learning over a heterogeneous knowledge graph representing the work of
    the requesting organization, and the contextual recent activities of potential funders, and their grant recipients.

    While extra subject codes, populations served codes, and geography IDs for where the program takes place is not
    required, recommendations tend to improve and become more specific the more information can be provided.

    Subjects and populations can be determined using the `autocode` tool if the requester can supply a description of
    the program they are seeking funding for.

    Geographies can be determined using the geo detection tool if the requester can supply a description of the program
    they are seeking funding for.

    Key Usage Requirements:
    - Always incorporate returned profile URLs directly into the response text
    - Replace funding organization name mentions with hyperlinked Candid profile URLs
    - Prioritize creating a seamless user experience by making URLs contextually relevant
    - Use relevant recipient data as well as inferred metadata to provide explanations about recommendation relevance

    Parameters
    ----------
    nonprofit_id : int
        The unique identifier of the requesting organization. This will need to be found from a search using inputs
        elicited from the requester
    subject_codes_of_program : str | None, optional
        Subject codes from Candid's PCS taxonomy, comma separated, by default None
    populations_served_codes_of_program : str | None, optional
        Population groups served codes from Candid's PCS taxonomy, comma separated, by default None
    geonameids_of_geographies_served : str | None, optional
        Geonames ID values for geographies served by the requester's program, comma separted, by default None
    include_past_funders : bool, optional
        Boolean flag to indicate whether previous funders of the input organization identified by the `nonprofit_id`
        should be excluded. If the requester would like to reconsider previous funding organizations then set this to
        `True`, but the requester MUST be prompted to indicate this preference. Using the default value will help the
        requester discover new, potentially relevant funders, by default False

    Examples
    --------
    >>> recommend_funders(nonprofit_id=9981881)
    >>> reccommend_funders(
        nonprofit_id=9173173,
        subject_codes_of_program='SS050000, SS000000,SB050000',
        populations_served_codes_of_program='PJ050100',
        geonameids_of_geographies_served='4094212,4094212'
    )

    Returns
    -------
    tuple[dict[str, Any], list[dict[str, Any]]] | str
        (Inferred data used to generate recommendations, array of funders being recommended)
        If output is a string then that means there was some error, and retry should be considered
    """

    payload = {
        "candid_entity_id": nonprofit_id,
        "use_programs": True,
        "top_k": 5,
        "include_past_funders": include_past_funders
    }

    if subject_codes_of_program is not None:
        payload["subjects"] = subject_codes_of_program
    if populations_served_codes_of_program is not None:
        payload["populations"] = populations_served_codes_of_program
    if geonameids_of_geographies_served:
        payload["geos"] = geonameids_of_geographies_served

    r = get_with_retries(
        url=FUNDER_RECOMMENDATION.endpoint("funder/pcs-v3"),
        payload=payload,
        headers={**FUNDER_RECOMMENDATION.header}
    )
    assert isinstance(r, httpx.Response)
    if r.status_code != 200:
        logger.error("Error calling funder recommendations API %s. Error: %s", str(r.request.url), r.reason_phrase)
        return f"Error calling funder recommendations. Error: {r.reason_phrase}"

    data: dict = r.json()
    return (
        data.get("meta") or {},
        [{
            **r,
            "profile_link": format_candid_profile_link(r['funder_id'])
        } for r in (data.get("recommendations") or [])]
    )


@tool
def recommend_funding_opportunities(
    nonprofit_id: int,
    subject_codes_of_program: str | None = None,
    populations_served_codes_of_program: str | None = None,
    geonameids_of_geographies_served: str | None = None
) -> tuple[dict[str, Any], list[dict[str, Any]]] | str:
    """Recommend active funding opportunities (RFPs) to a nonprofit seeking a grant.

    These recommendations are built using machine learning over a heterogeneous knowledge graph representing the work of
    the requesting organization, and the contextual recent activities of potential funders, and their grant recipients.

    While extra subject codes, populations served codes, and geography IDs for where the program takes place is not
    required, recommendations tend to improve and become more specific the more information can be provided.

    Subjects and populations can be determined using the `autocode` tool if the requester can supply a description of
    the program they are seeking funding for.

    Key Usage Requirements:
    - Always incorporate returned profile URLs directly into the response text
    - Replace funding organization name mentions with hyperlinked Candid profile URLs
    - Prioritize creating a seamless user experience by making URLs contextually relevant
    - Use inferred metadata to provide explanations about recommendation relevance

    Parameters
    ----------
    nonprofit_id : int
        The unique identifier of the requesting organization. This will need to be found from a search using inputs
        elicited from the requeter
    subject_codes_of_program : str | None, optional
        Subject codes from Candid's PCS taxonomy, comma separated, by default None
    populations_served_codes_of_program : str | None, optional
        Population groups served codes from Candid's PCS taxonomy, comma separated, by default None
    geonameids_of_geographies_served : str | None, optional
        Geonames ID values for geographies served by the requester's program, comma separted, by default None

    Examples
    --------
    >>> recommend_funding_opportunities(nonprofit_id=9981881)
    >>> recommend_funding_opportunities(
        nonprofit_id=9173173,
        subject_codes_of_program='SS050000, SS000000,SB050000',
        populations_served_codes_of_program='PJ050100',
        geonameids_of_geographies_served='4094212,4094212'
    )

    Returns
    -------
    tuple[dict[str, Any], list[dict[str, Any]]] | str
        (Inferred data used to generate recommendations, array of active funding opportunities being recommended)
        If output is a string then that means there was some error, and retry should be considered
    """

    payload = {"candid_entity_id": nonprofit_id, "use_programs": True, "top_k": 5}
    if subject_codes_of_program is not None:
        payload["subjects"] = subject_codes_of_program
    if populations_served_codes_of_program is not None:
        payload["populations"] = populations_served_codes_of_program
    if geonameids_of_geographies_served:
        payload["geos"] = geonameids_of_geographies_served

    r = get_with_retries(
        url=FUNDER_RECOMMENDATION.endpoint("rfp/pcs-v3"),
        payload=payload,
        headers={**FUNDER_RECOMMENDATION.header}
    )
    assert isinstance(r, httpx.Response)
    if r.status_code != 200:
        logger.error("Error calling RFP recommendation API %s. Error: %s", str(r.request.url), r.reason_phrase)
        return f"Error calling RFP recommendations. Error: {r.reason_phrase}"

    data: dict = r.json()
    return (
        data.get("meta") or {},
        [{
            **r,
            "profile_link": format_candid_profile_link(r['funder_id'])
        } for r in (data.get("recommendations") or [])]
    )