Spaces:
Running
Running
| from typing import Any | |
| from pydantic import BaseModel, Field | |
| from langchain_core.output_parsers.pydantic import PydanticOutputParser | |
| from langchain_core.language_models.chat_models import BaseChatModel | |
| from langchain_core.runnables import RunnableSequence | |
| from langchain_core.prompts import PromptTemplate | |
| from langchain_core.tools import tool, BaseTool | |
| from thefuzz import fuzz | |
| from ask_candid.tools.utils import format_candid_profile_link | |
| from ask_candid.base.api_base import BaseAPI | |
| from ask_candid.base.config.rest import CANDID_SEARCH_API | |
| class OrganizationNames(BaseModel): | |
| """List of names of social-sector organizations, such as nonprofits and foundations.""" | |
| orgnames: list[str] = Field(..., description="List of organization names.") | |
| class OrganizationIdentifierArgs(BaseModel): | |
| """Input arguments for the organization identifier tool""" | |
| text: str = Field(..., description="Chat model response text which contains named organizations.") | |
| class OrganizationIdentifier(BaseTool): | |
| llm: BaseChatModel | |
| parser: PydanticOutputParser = PydanticOutputParser(pydantic_object=OrganizationNames) | |
| template: str = """Extract only the names of officially recognized organizations, foundations, and government | |
| entities from the text below. Do not include any entries that contain descriptions, regional identifiers, or | |
| explanations within parentheses or following the name. Strictly exclude databases, resources, crowdfunding | |
| platforms, and general terms. Provide the output only in the specified JSON format. | |
| input text: ```{chatbot_output}``` | |
| output format: ```{format_instructions}``` | |
| """ | |
| name: str = "organization_identifier" | |
| description: str = """ | |
| Identify the names of nonprofits and foundations from chat model responses. If it is likely that a response contains | |
| proper names then it should be processed through this tool. | |
| Some tools have outputs with organizations already identified by Candid's data. These include: | |
| * grant search | |
| * organization search | |
| * funder recommendations | |
| * RFP recommendations | |
| If these tools are invoked then use the IDs and profile URLs provided in their outputs, and DO NOT use this tool. | |
| Examples | |
| -------- | |
| >>> `organization_identifier('My Favorite Foundation awarded a grant to My Favorite Nonprofit.')` | |
| >>> `organization_identifier('The LoremIpsum Nonprofit will be running a community event this Thursday')` | |
| """ | |
| args_schema: type[OrganizationIdentifierArgs] = OrganizationIdentifierArgs | |
| def _build_pipeline(self): | |
| prompt = PromptTemplate( | |
| template=self.template, | |
| input_variables=["chatbot_output"], | |
| partial_variables={"format_instructions": self.parser.get_format_instructions()} | |
| ) | |
| return RunnableSequence(prompt, self.llm, self.parser) | |
| def _run(self, text: str) -> list[str]: | |
| chain = self._build_pipeline() | |
| result: OrganizationNames = chain.invoke({"chatbot_output": text}) | |
| return result.orgnames | |
| async def _arun(self, text: str) -> list[str]: | |
| chain = self._build_pipeline() | |
| result: OrganizationNames = await chain.ainvoke({"chatbot_output": text}) | |
| return result.orgnames | |
| def name_search(name: str) -> list[dict[str, Any]]: | |
| candid_org_search = BaseAPI( | |
| url=f'{CANDID_SEARCH_API["url"]}/v1/search', | |
| headers={"x-api-key": CANDID_SEARCH_API["key"]} | |
| ) | |
| results = candid_org_search.get( | |
| query=f"'{name}'", | |
| searchMode="organization_only", | |
| rowCount=5 | |
| ) | |
| return results.get("returnedOrgs") or [] | |
| def find_similar(name: str, potential_matches: list[dict[str, Any]], threshold: int = 80): | |
| for org in potential_matches: | |
| similarity = max( | |
| fuzz.ratio(name.lower(), (org["orgName"] or "").lower()), | |
| fuzz.ratio(name.lower(), (org["akaName"] or "").lower()), | |
| fuzz.ratio(name.lower(), (org["dbaName"] or "").lower()), | |
| ) | |
| if similarity >= threshold: | |
| yield org, similarity | |
| def find_mentioned_organizations(organizations: list[str]) -> tuple[str, dict[str, str]]: | |
| """Match organization names found in a chat response to official organizations tracked by Candid. This involves | |
| using the Candid Search API in a lookup mode, and then finding the best result(s) using a heuristic string | |
| similarity search. | |
| This tool is focused on getting links to the organization's Candid profile for the user to click and explore in | |
| more detail. | |
| Use the URLs here to replace organization names in the chat response with links to the organization's profile. Links | |
| to Candid profiles **MUST** be used to do the following: | |
| 1. Generate direct links to Candid organization profiles | |
| 2. Provide a mechanism for users to easily access detailed organizational information | |
| 3. Enhance responses with authoritative source links | |
| Key Usage Requirements: | |
| - Always incorporate returned profile URLs directly into the response text | |
| - Replace organization name mentions with hyperlinked Candid profile URLs | |
| - Prioritize creating a seamless user experience by making URLs contextually relevant | |
| Example Desired Output: | |
| Instead of: 'The Gates Foundation does impressive work.' | |
| Use: 'The [Gates Foundation](https://app.candid.org/profile/XXXXX) does impressive work.' | |
| The function returns a tuple with: | |
| - A link information text (optional) | |
| - A dictionary mapping input names to their best Candid Search profile URL | |
| Failure to integrate the URLs into the response is considered an incomplete implementation.", | |
| Examples | |
| -------- | |
| >>> find_mentioned_organizations(organizations=['Gates Foundation', 'Candid']) | |
| Parameters | |
| ---------- | |
| organizations : list[str] | |
| A list of organization name strings found in a chat response message which need to be matches | |
| Returns | |
| ------- | |
| tuple[str, dict[str, str]] | |
| (Link information text, mapping input name --> Candid Search profile URL of the best potential match) | |
| """ | |
| output = {} | |
| for name in organizations: | |
| search_results = name_search(name) | |
| try: | |
| best_result, _ = max(find_similar(name=name, potential_matches=search_results), key=lambda x: x[-1]) | |
| except ValueError: | |
| # no similar organizations could be found for this one, keep going | |
| continue | |
| output[name] = format_candid_profile_link(best_result["candidEntityID"]) | |
| response = [f"The Candid profile link for {name} is {url}" for name, url in output.items()] | |
| return '. '.join(response), output | |
| def find_mentioned_organizations_detailed(organizations: list[str]) -> dict[str, dict[str, Any]]: | |
| """Match organization names found in a chat response to official organizations tracked by Candid. This involves | |
| using the Candid Search API in a lookup mode, and then finding the best result(s) using a heuristic string | |
| similarity search. | |
| Examples | |
| -------- | |
| >>> find_mentioned_organizations(organizations=['Gates Foundation', 'Candid']) | |
| Parameters | |
| ---------- | |
| organizations : list[str] | |
| A list of organization name strings found in a chat response message which need to be matches | |
| Returns | |
| ------- | |
| dict[str, dict[str, Any]] | |
| Mapping from the input name(s) to the best potential match. | |
| """ | |
| output = {} | |
| for name in organizations: | |
| search_results = name_search(name) | |
| try: | |
| best_result, _ = max(find_similar(name=name, potential_matches=search_results), key=lambda x: x[-1]) | |
| except ValueError: | |
| # no similar organizations could be found for this one, keep going | |
| continue | |
| output[name] = best_result | |
| return output | |