Spaces:

CandidAI
/

ask-candid

Running

App Files Files Community

brainsqueeze commited on Dec 12, 2024

Commit

34b2d9c

verified ·

1 Parent(s): 8a6abdf

Delete tools

Browse files

Files changed (4) hide show

tools/__init__.py +0 -0
tools/config.py +0 -5
tools/org_seach.py +0 -196
tools/question_reformulation.py +0 -44

tools/__init__.py DELETED Viewed

File without changes

tools/config.py DELETED Viewed

@@ -1,5 +0,0 @@
-import os
-CDS_API = {
-    'CDS_API_URL': os.getenv('CDS_API_URL'),
-    'CDS_API_KEY': os.getenv('CDS_API_KEY')
-}

tools/org_seach.py DELETED Viewed

@@ -1,196 +0,0 @@
-from typing import List
-import re
-from fuzzywuzzy import fuzz
-from langchain.output_parsers.openai_tools import JsonOutputToolsParser
-from langchain_openai.chat_models import ChatOpenAI
-from langchain_core.runnables import RunnableSequence
-from langchain_core.prompts import ChatPromptTemplate
-from pydantic import BaseModel, Field
-try:
-    from common.org_search_component import OrgSearch
-except ImportError:
-    from ...common.org_search_component import OrgSearch
-search = OrgSearch()
-class OrganizationNames(BaseModel):
-    """List of names of social-sector organizations, such as nonprofits and foundations."""
-    orgnames: List[str] = Field(description="List of organization names")
-def extract_org_links_from_chatbot(chatbot_output: str):
-    """
-    Extracts a list of organization names from the provided text.
-    Args:
-        chatbot_output (str):The chatbot output containing organization names and other content.
-    Returns:
-        list: A list of organization names extracted from the text.
-    Raises:
-        ValueError: If parsing fails or if an unexpected output format is received.
-    """
-    prompt = """Extract only the names of officially recognized organizations, foundations, and government entities
-    from the text below. Do not include any entries that contain descriptions, regional identifiers, or explanations
-    within parentheses or following the name. Strictly exclude databases, resources, crowdfunding platforms, and general
-    terms. Provide the output only in the specified JSON format.
-    input text below:
-        ```{chatbot_output}``
-    output format:
-    {{
-    'orgnames' : [list of organization names without any additional descriptions or identifiers]
-    }}
-    """
-    try:
-        parser = JsonOutputToolsParser()
-        llm = ChatOpenAI(model="gpt-4o").bind_tools([OrganizationNames])
-        prompt = ChatPromptTemplate.from_template(prompt)
-        chain = RunnableSequence(prompt, llm, parser)
-        # Run the chain with the input data
-        result = chain.invoke({"chatbot_output": chatbot_output})
-        # Extract the organization names from the output
-        output_list = result[0]["args"].get("orgnames", [])
-        # Validate output format
-        if not isinstance(output_list, list):
-            raise ValueError("Unexpected output format: 'orgnames' should be a list")
-        return output_list
-    except Exception as e:
-        # Log or print the error as needed for debugging
-        print(f"text does not have any organization: {e}")
-        return []
-def is_similar(name: str, list_of_dict: list, threshold: int = 80):
-    """
-    Returns True if `name` is similar to any names in `list_of_dict` based on a similarity threshold.
-    """
-    try:
-        for item in list_of_dict:
-            try:
-                # Attempt to calculate similarity score
-                similarity = fuzz.ratio(name.lower(), item["name"].lower())
-                if similarity >= threshold:
-                    return True
-            except KeyError:
-                # Handle cases where 'name' key might be missing in dictionary
-                print(f"KeyError: Missing 'name' key in dictionary item {item}")
-                continue
-            except AttributeError:
-                # Handle non-string name values in dictionary items
-                print(f"AttributeError: Non-string 'name' in dictionary item {item}")
-                continue
-    except TypeError as e:
-        # Handle cases where input types are incorrect
-        print(f"TypeError: {e}")
-        return False
-    return False
-def generate_org_link_dict(org_names_list: list):
-    """
-    Maps organization names to their Candid profile URLs if available.
-    For each organization in `output_list`, this function attempts to retrieve a matching profile
-    using `search_org`. If a similar name is found and a Candid entity ID is available, it constructs
-    a profile URL. If no ID or similar match is found, or if an error occurs, it assigns an empty string.
-    Args:
-        output_list (list): List of organization names (str) to retrieve Candid profile links for.
-    Returns:
-        dict: Dictionary with organization names as keys and Candid profile URLs or empty strings as values.
-    Example:
-        get_org_link(['New York-Presbyterian Hospital'])
-        # {'New York-Presbyterian Hospital': 'https://app.candid.org/profile/6915255'}
-    """
-    link_dict = {}
-    for org in org_names_list:
-        try:
-            # Attempt to retrieve organization data
-            response = search(org, name_only=True)
-            # Check if there is a valid response and if names are similar
-            if response and is_similar(org, response[0].get("names", "")):
-                # Try to get the Candid entity ID and construct the URL
-                candid_entity_id = response[0].get("candid_entity_id")
-                if candid_entity_id:
-                    link_dict[org] = (
-                        f"https://app.candid.org/profile/{candid_entity_id}"
-                    )
-                else:
-                    link_dict[org] = ""  # No ID found, set empty string
-            else:
-                link_dict[org] = ""  # No similar match found
-        except KeyError as e:
-            # Handle missing keys in the response dictionary
-            print(f"KeyError encountered for organization '{org}': {e}")
-            link_dict[org] = ""
-        except Exception as e:
-            # Catch any other unexpected errors
-            print(f"An error occurred for organization '{org}': {e}")
-            link_dict[org] = ""
-    return link_dict
-def embed_org_links_in_text(input_text: str, org_link_dict: dict):
-    """
-    Replaces organization names in `text` with links from `link_dict` and appends a Candid info message.
-    Args:
-        text (str): The text containing organization names.
-        link_dict (dict): Mapping of organization names to URLs.
-    Returns:
-        str: Updated text with linked organization names and an appended Candid message.
-    """
-    try:
-        for org_name, url in org_link_dict.items():
-            if url:  # Only proceed if the URL is not empty
-                regex_pattern = re.compile(re.escape(org_name))
-                input_text = regex_pattern.sub(
-                    repl=f"<a href={url} target='_blank' rel='noreferrer' class='candid-org-link'>{org_name}</a>",
-                    string=input_text
-                )
-        # Append Candid information message at the end
-        input_text += (
-            "<p class='candid-app-link'> "
-            "Visit <a href=https://app.candid.org/ target='_blank' rel='noreferrer' class='candid-org-link'>Candid</a> "
-            "to get nonprofit information you need.</p>"
-        )
-    except TypeError as e:
-        print(f"TypeError encountered: {e}")
-        return input_text
-    except re.error as e:
-        print(f"Regex error encountered for '{org_name}': {e}")
-        return input_text
-    except Exception as e:
-        print(f"Unexpected error: {e}")
-        return input_text
-    return input_text

tools/question_reformulation.py DELETED Viewed

@@ -1,44 +0,0 @@
-from langchain_core.prompts import ChatPromptTemplate
-from langchain_core.output_parsers import StrOutputParser
-def reformulate_question_using_history(state, llm):
-    """
-    Transform the query to produce a better query with details from previous messages.
-    Args:
-        state (messages): The current state
-        llm: LLM to use
-    Returns:
-        dict: The updated state with re-phrased question and original user_input for UI
-    """
-    print("---REFORMULATE THE USER INPUT---")
-    messages = state["messages"]
-    question = messages[-1].content
-    if len(messages) > 1:
-        contextualize_q_system_prompt = """Given a chat history and the latest user input \
-        which might reference context in the chat history, formulate a standalone input \
-        which can be understood without the chat history.
-        Chat history:
-        \n ------- \n
-        {chat_history}
-        \n ------- \n
-        User input:
-        \n ------- \n
-        {question}
-        \n ------- \n
-        Do NOT answer the question, \
-        just reformulate it if needed and otherwise return it as is.
-        """
-        contextualize_q_prompt = ChatPromptTemplate([
-            ("system", contextualize_q_system_prompt),
-            ("human", question),
-        ])
-        rag_chain = contextualize_q_prompt | llm | StrOutputParser()
-        new_question = rag_chain.invoke({"chat_history": messages, "question": question})
-        print(f"user asked: '{question}', agent reformulated the question basing on the chat history: {new_question}")
-        return {"messages": [new_question], "user_input" : question}
-    return {"messages": [question], "user_input" : question}