brainsqueeze commited on
Commit
b917db3
·
verified ·
1 Parent(s): d0f7444

Delete ask_candid/tools/org_seach.py

Browse files
Files changed (1) hide show
  1. ask_candid/tools/org_seach.py +0 -257
ask_candid/tools/org_seach.py DELETED
@@ -1,257 +0,0 @@
1
- from typing import List, Optional, Callable, Any
2
- import logging
3
- import re
4
-
5
- from thefuzz import fuzz
6
-
7
- from langchain.output_parsers.openai_tools import JsonOutputToolsParser
8
- from langchain_core.runnables import RunnableSequence
9
- from langchain_core.prompts import ChatPromptTemplate
10
- from langchain_core.language_models.llms import LLM
11
- from langchain_core.messages import AIMessage
12
- from langgraph.constants import END
13
- from pydantic import BaseModel, Field
14
-
15
- from ask_candid.agents.schema import AgentState
16
- from ask_candid.services.org_search import OrgSearch
17
-
18
- search = OrgSearch()
19
- logging.basicConfig(format="[%(levelname)s] (%(asctime)s) :: %(message)s")
20
- logger = logging.getLogger(__name__)
21
- logger.setLevel(logging.INFO)
22
-
23
-
24
- class OrganizationNames(BaseModel):
25
- """List of names of social-sector organizations, such as nonprofits and foundations."""
26
- orgnames: List[str] = Field(description="List of organization names")
27
-
28
-
29
- def extract_org_links_from_chatbot(chatbot_output: str, llm: LLM):
30
- """
31
- Extracts a list of organization names from the provided text.
32
-
33
- Args:
34
- chatbot_output (str):The chatbot output containing organization names and other content.
35
-
36
- Returns:
37
- list: A list of organization names extracted from the text.
38
-
39
- Raises:
40
- ValueError: If parsing fails or if an unexpected output format is received.
41
- """
42
- prompt = """Extract only the names of officially recognized organizations, foundations, and government entities
43
- from the text below. Do not include any entries that contain descriptions, regional identifiers, or explanations
44
- within parentheses or following the name. Strictly exclude databases, resources, crowdfunding platforms, and general
45
- terms. Provide the output only in the specified JSON format.
46
-
47
- input text below:
48
-
49
- ```{chatbot_output}``
50
-
51
- output format:
52
- {{
53
- 'orgnames' : [list of organization names without any additional descriptions or identifiers]
54
- }}
55
-
56
- """
57
-
58
- try:
59
- parser = JsonOutputToolsParser()
60
- model = llm.bind_tools([OrganizationNames])
61
- prompt = ChatPromptTemplate.from_template(prompt)
62
- chain = RunnableSequence(prompt, model, parser)
63
-
64
- # Run the chain with the input data
65
- result = chain.invoke({"chatbot_output": chatbot_output})
66
-
67
- # Extract the organization names from the output
68
- output_list = result[0]["args"].get("orgnames", [])
69
-
70
- # Validate output format
71
- if not isinstance(output_list, list):
72
- raise ValueError("Unexpected output format: 'orgnames' should be a list")
73
-
74
- return output_list
75
-
76
- except Exception as e:
77
- # Log or print the error as needed for debugging
78
- print(f"text does not have any organization: {e}")
79
- return []
80
-
81
-
82
- def is_similar(name: str, list_of_dict: list, threshold: int = 80):
83
- """
84
- Returns True if `name` is similar to any names in `list_of_dict` based on a similarity threshold.
85
- """
86
- try:
87
- for item in list_of_dict:
88
- try:
89
- # Attempt to calculate similarity score
90
- similarity = fuzz.ratio(name.lower(), item["name"].lower())
91
- if similarity >= threshold:
92
- return True
93
- except KeyError:
94
- # Handle cases where 'name' key might be missing in dictionary
95
- print(f"KeyError: Missing 'name' key in dictionary item {item}")
96
- continue
97
- except AttributeError:
98
- # Handle non-string name values in dictionary items
99
- print(f"AttributeError: Non-string 'name' in dictionary item {item}")
100
- continue
101
- except TypeError as e:
102
- # Handle cases where input types are incorrect
103
- print(f"TypeError: {e}")
104
- return False
105
-
106
- return False
107
-
108
-
109
- def generate_org_link_dict(org_names_list: list):
110
- """
111
- Maps organization names to their Candid profile URLs if available.
112
-
113
- For each organization in `output_list`, this function attempts to retrieve a matching profile
114
- using `search_org`. If a similar name is found and a Candid entity ID is available, it constructs
115
- a profile URL. If no ID or similar match is found, or if an error occurs, it assigns an empty string.
116
-
117
- Args:
118
- output_list (list): List of organization names (str) to retrieve Candid profile links for.
119
-
120
- Returns:
121
- dict: Dictionary with organization names as keys and Candid profile URLs or empty strings as values.
122
-
123
- Example:
124
- get_org_link(['New York-Presbyterian Hospital'])
125
- # {'New York-Presbyterian Hospital': 'https://app.candid.org/profile/6915255'}
126
- """
127
- link_dict = {}
128
-
129
- for org in org_names_list:
130
- try:
131
- # Attempt to retrieve organization data
132
- response = search(org, name_only=True)
133
-
134
- # Check if there is a valid response and if names are similar
135
- if response and is_similar(org, response[0].get("names", "")):
136
- # Try to get the Candid entity ID and construct the URL
137
- candid_entity_id = response[0].get("candid_entity_id")
138
- if candid_entity_id:
139
- link_dict[org] = (
140
- f"https://app.candid.org/profile/{candid_entity_id}"
141
- )
142
- else:
143
- link_dict[org] = "" # No ID found, set empty string
144
- else:
145
- link_dict[org] = "" # No similar match found
146
-
147
- except KeyError as e:
148
- # Handle missing keys in the response dictionary
149
- print(f"KeyError encountered for organization '{org}': {e}")
150
- link_dict[org] = ""
151
-
152
- except Exception as e:
153
- # Catch any other unexpected errors
154
-
155
- print(f"An error occurred for organization '{org}': {e}")
156
- link_dict[org] = ""
157
-
158
- return link_dict
159
-
160
-
161
- def embed_org_links_in_text(input_text: str, org_link_dict: dict):
162
- """
163
- Replaces organization names in `text` with links from `link_dict` and appends a Candid info message.
164
-
165
- Args:
166
- text (str): The text containing organization names.
167
- link_dict (dict): Mapping of organization names to URLs.
168
-
169
- Returns:
170
- str: Updated text with linked organization names and an appended Candid message.
171
- """
172
- try:
173
- for org_name, url in org_link_dict.items():
174
- if url: # Only proceed if the URL is not empty
175
- regex_pattern = re.compile(re.escape(org_name))
176
- input_text = regex_pattern.sub(
177
- repl=f"<a href={url} target='_blank' rel='noreferrer' class='candid-org-link'>{org_name}</a>",
178
- string=input_text
179
- )
180
-
181
- # Append Candid information message at the end
182
- input_text += (
183
- "<p class='candid-app-link'> "
184
- "Visit <a href=https://app.candid.org/ target='_blank' rel='noreferrer' class='candid-org-link'>Candid</a> "
185
- "to get nonprofit information you need.</p>"
186
- )
187
-
188
- except TypeError as e:
189
- print(f"TypeError encountered: {e}")
190
- return input_text
191
-
192
- except re.error as e:
193
- print(f"Regex error encountered for '{org_name}': {e}")
194
- return input_text
195
-
196
- except Exception as e:
197
- print(f"Unexpected error: {e}")
198
- return input_text
199
-
200
- return input_text
201
-
202
-
203
- def has_org_name(
204
- state: AgentState,
205
- llm: LLM,
206
- user_callback: Optional[Callable[[str], Any]] = None
207
- ) -> AgentState:
208
- """Processes the latest message to extract organization links and determine the next step.
209
-
210
- Parameters
211
- ----------
212
- state : AgentState
213
- The current state of the agent, including a list of messages.
214
- llm : LLM
215
- user_callback : Optional[Callable[[str], Any]], optional
216
- Optional UI callback to inform the user of apps states, by default None
217
-
218
- Returns
219
- -------
220
- AgentState
221
- """
222
-
223
- logger.info("---HAS ORG NAMES?---")
224
- if user_callback is not None:
225
- try:
226
- user_callback("Checking for relevant organizations")
227
- except Exception as ex:
228
- logger.warning("User callback was passed in but failed: %s", ex)
229
-
230
- messages = state["messages"]
231
- last_message = messages[-1].content
232
- output_list = extract_org_links_from_chatbot(last_message, llm=llm)
233
- link_dict = generate_org_link_dict(output_list) if output_list else {}
234
- if link_dict:
235
- logger.info("---FOUND ORG NAMES---")
236
- return {"next": "insert_org_link", "org_dict": link_dict}
237
- logger.info("---NO ORG NAMES FOUND---")
238
- return {"next": END, "messages": messages}
239
-
240
-
241
- def insert_org_link(state: AgentState) -> AgentState:
242
- """
243
- Embeds organization links in the latest message content and returns it as an AI message.
244
-
245
- Args:
246
- state (dict): The current state, including the organization links and latest message.
247
-
248
- Returns:
249
- dict: A dictionary with the updated message content as an AIMessage.
250
- """
251
- logger.info("---INSERT ORG LINKS---")
252
- messages = state["messages"]
253
- last_message = messages[-1].content
254
- messages.pop(-1) # Deleting the original message because we will append the same one but with links
255
- link_dict = state["org_dict"]
256
- last_message = embed_org_links_in_text(last_message, link_dict)
257
- return {"messages": [AIMessage(content=last_message)]}