Spaces:

CandidAI
/

ask-candid

Running

App Files Files Community

brainsqueeze commited on Sep 29

Commit

d916808

verified ·

1 Parent(s): fc7f1bb

Delete ask_candid/tools/elastic

Browse files

Files changed (5) hide show

ask_candid/tools/elastic/__init__.py +0 -0
ask_candid/tools/elastic/index_data_tool.py +0 -59
ask_candid/tools/elastic/index_details_tool.py +0 -75
ask_candid/tools/elastic/index_search_tool.py +0 -131
ask_candid/tools/elastic/list_indices_tool.py +0 -59

ask_candid/tools/elastic/__init__.py DELETED Viewed

File without changes

ask_candid/tools/elastic/index_data_tool.py DELETED Viewed

@@ -1,59 +0,0 @@
-from typing import Type, Optional
-import logging
-from pydantic import BaseModel, Field
-from elasticsearch import Elasticsearch
-from langchain.callbacks.manager import CallbackManagerForToolRun
-from langchain.tools.base import BaseTool
-from ask_candid.base.config.connections import SEMANTIC_ELASTIC_QA
-logging.basicConfig(level="INFO")
-logger = logging.getLogger("elasticsearch_playground")
-es = Elasticsearch(
-    cloud_id=SEMANTIC_ELASTIC_QA.cloud_id,
-    api_key=SEMANTIC_ELASTIC_QA.api_key,
-    verify_certs=True,
-    request_timeout=60 * 3
-)
-class IndexShowDataInput(BaseModel):
-    """Input for the index show data tool."""
-    index_name: str = Field(
-        ..., description="The name of the index for which the data is to be retrieved"
-    )
-class IndexShowDataTool(BaseTool):
-    """Tool for getting a list of entries from an ElasticSearch index, helpful to figure out what data is available."""
-    name: str = "elastic_index_show_data"  # Added type annotation
-    description: str = (
-        "Input is an index name, output is a JSON based string with an extract of the data of the index"
-    )
-    args_schema: Optional[Type[BaseModel]] = (
-        IndexShowDataInput  # This should be placed before methods
-    )
-    def _run(
-        self,
-        index_name: str,
-        run_manager: Optional[CallbackManagerForToolRun] = None,
-    ) -> str:
-        """Get all indices in the Elasticsearch server, usually separated by a line break."""
-        try:
-            # Ensure `es` is properly initialized before this method is called
-            res = es.search(
-                index=index_name,
-                from_=0,
-                size=20,
-                query={"match_all": {}},
-            )
-            return str(res["hits"])
-        except Exception as e:
-            print(e)
-            logger.exception("Could not fetch index data for %s", index_name)
-            return ""

ask_candid/tools/elastic/index_details_tool.py DELETED Viewed

@@ -1,75 +0,0 @@
-from typing import Type, Optional
-import logging
-from pydantic import BaseModel, Field
-from elasticsearch import Elasticsearch
-from langchain.callbacks.manager import (
-    AsyncCallbackManagerForToolRun,
-    CallbackManagerForToolRun,
-)
-from langchain.tools.base import BaseTool
-from ask_candid.base.config.connections import SEMANTIC_ELASTIC_QA
-logging.basicConfig(level="INFO")
-logger = logging.getLogger("elasticsearch_playground")
-es = Elasticsearch(
-    cloud_id=SEMANTIC_ELASTIC_QA.cloud_id,
-    api_key=SEMANTIC_ELASTIC_QA.api_key,
-    verify_certs=True,
-    request_timeout=60 * 3,
-)
-class IndexDetailsInput(BaseModel):
-    """Input for the list index details tool."""
-    index_name: str = Field(
-        ...,
-        description="The name of the index for which the details are to be retrieved",
-    )
-class IndexDetailsTool(BaseTool):
-    """Tool for getting information about a single ElasticSearch index."""
-    name: str = "elastic_index_show_details"  # Added type annotation
-    description: str = (
-        "Input is an index name, output is a JSON-based string with the aliases, mappings containing the field names, and settings of an index."
-    )
-    args_schema: Optional[Type[BaseModel]] = (
-        IndexDetailsInput  # Ensure this is above the methods
-    )
-    def _run(
-        self,
-        index_name: str,
-        run_manager: Optional[CallbackManagerForToolRun] = None,
-    ) -> str:
-        """Get information about a single Elasticsearch index."""
-        try:
-            # Ensure that `es` is correctly initialized before calling this method
-            alias = es.indices.get_alias(index=index_name)
-            field_mappings = es.indices.get_field_mapping(index=index_name, fields="*")
-            field_settings = es.indices.get_settings(index=index_name)
-            return str(
-                {
-                    "alias": alias[index_name],
-                    "field_mappings": field_mappings[index_name],
-                    "settings": field_settings[index_name],
-                }
-            )
-        except Exception as e:
-            logger.exception(
-                "Could not fetch index information for %s: %s", index_name, e
-            )
-            return ""
-    async def _arun(
-        self,
-        index_name: str = "",
-        run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
-    ) -> str:
-        raise NotImplementedError("IndexDetailsTool does not support async operations")

ask_candid/tools/elastic/index_search_tool.py DELETED Viewed

@@ -1,131 +0,0 @@
-import logging
-import json
-import tiktoken
-from elasticsearch import Elasticsearch
-# from pydantic.v1 import BaseModel, Field  # <-- Uses v1 namespace
-from pydantic import BaseModel, Field
-from langchain.tools import StructuredTool
-from ask_candid.base.config.connections import SEMANTIC_ELASTIC_QA
-logging.basicConfig(level="INFO")
-logger = logging.getLogger("elasticsearch_playground")
-es = Elasticsearch(
-    cloud_id=SEMANTIC_ELASTIC_QA.cloud_id,
-    api_key=SEMANTIC_ELASTIC_QA.api_key,
-    verify_certs=True,
-    request_timeout=60 * 3,
-)
-class SearchToolInput(BaseModel):
-    """Input for the index show data tool."""
-    index_name: str = Field(
-        ..., description="The name of the index for which the data is to be retrieved"
-    )
-    query: str = Field(
-        ...,
-        description="The ElasticSearch JSON query used to filter all hits. Should use the _source field if possible to specify required fields.",
-    )
-    from_: int = Field(
-        ..., description="The record index from which the query will start"
-    )
-    size: int = Field(
-        ...,
-        description="How many records will be retrieved from the ElasticSearch query",
-    )
-def elastic_search(
-    pcs_codes: dict,
-    index_name: str,
-    query: str,
-    from_: int = 0,
-    size: int = 20,
-):
-    """Executes a specific query on an ElasticSearch index and returns all hits or aggregation results"""
-    size = min(50, size)
-    encoding = tiktoken.encoding_for_model("gpt-4")
-    try:
-        full_dict: dict = json.loads(query)
-        query_dict = None
-        aggs_dict = None
-        sort_dict = None
-        if "query" in full_dict:
-            query_dict = full_dict["query"]
-        if "aggs" in full_dict:
-            aggs_dict = full_dict["aggs"]
-        if "sort" in full_dict:
-            sort_dict = full_dict["sort"]
-        if query_dict is None and aggs_dict is None and sort_dict is None:
-            # Assume that there is a query but that the query part was ommitted.
-            query_dict = full_dict
-        if query_dict is None and aggs_dict is not None:
-            # This is an aggregation query, therefore we suppress the hits here
-            size = 200
-        logger.info(query)
-        # Print the query
-        # print(f"Executing Elasticsearch Query: {query}")
-        final_res = ""
-        retries = 0
-        while retries < 100:
-            res = es.search(
-                index=index_name,
-                from_=from_,
-                size=size,
-                query=query_dict,
-                aggs=aggs_dict,
-                sort=sort_dict,
-            )
-            if query_dict is None and aggs_dict is not None:
-                # When a result has aggregations, just return that and ignore the rest
-                final_res = str(res["aggregations"])
-            elif query_dict is not None and aggs_dict is not None:
-                # Return both hits and aggregations
-                final_res = str(
-                    {
-                        "hits": res.get("hits", {}),
-                        "aggregations": res.get("aggregations", {}),
-                    }
-                )
-            else:
-                final_res = str(res["hits"])
-            tokens = encoding.encode(final_res)
-            retries += 1
-            if len(tokens) > 6000:
-                size -= 1
-            else:
-                return final_res
-    except Exception as e:
-        logger.exception("Could not execute query %s", query)
-        msg = str(e)
-        return msg
-def create_search_tool(pcs_codes):
-    return StructuredTool.from_function(
-        func=lambda index_name, query, from_, size: elastic_search(
-            pcs_codes=pcs_codes,
-            index_name=index_name,
-            query=query,
-            from_=from_,
-            size=size,
-        ),
-        name="elastic_index_search_tool",
-        description=(
-            """This tool allows executing queries on an Elasticsearch index efficiently. Provide:
-                        1. index_name (string): The target Elasticsearch index.
-                        2. query (dictionary): Defines the query structure, supporting:
-                            a. Filters: For precise data retrieval (e.g., match, term, range).
-                            b. Aggregations: For statistical summaries and grouping (e.g., sum, average, histogram).
-                            c. Full-text search: For analyzing and ranking text-based results (e.g., match, multi-match, query_string).
-                        """
-        ),
-        args_schema=SearchToolInput,
-    )

ask_candid/tools/elastic/list_indices_tool.py DELETED Viewed

@@ -1,59 +0,0 @@
-from typing import Type, Optional, List
-import logging
-from pydantic import BaseModel, Field
-from elasticsearch import Elasticsearch
-from langchain.callbacks.manager import AsyncCallbackManagerForToolRun
-from langchain.tools.base import BaseTool
-from ask_candid.base.config.connections import SEMANTIC_ELASTIC_QA
-logging.basicConfig(level="INFO")
-logger = logging.getLogger("elasticsearch_playground")
-es = Elasticsearch(
-    cloud_id=SEMANTIC_ELASTIC_QA.cloud_id,
-    api_key=SEMANTIC_ELASTIC_QA.api_key,
-    verify_certs=True,
-    request_timeout=60 * 3
-)
-class ListIndicesInput(BaseModel):
-    """Input for the list indices tool."""
-    separator: str = Field(..., description="Separator for the list of indices")
-class ListIndicesTool(BaseTool):
-    """Tool for getting all ElasticSearch indices."""
-    name: str = "elastic_list_indices"  # Added type annotation
-    description: str = (
-        "Input is a delimiter like comma or new line. Output is a separated list of indices in the database. "
-        "Always use this tool to get to know the indices in the ElasticSearch cluster."
-    )
-    args_schema: Optional[Type[BaseModel]] = (
-        ListIndicesInput  # Define this before methods
-    )
-    def _run(self, separator: str) -> str:
-        """Get all indices in the Elasticsearch server, usually separated by a line break."""
-        try:
-            # Ensure that `es` is correctly initialized before calling this method
-            indices: List[str] = es.cat.indices(h="index", s="index").split()
-            # Filter out hidden indices starting with a dot
-            return separator.join(
-                [index for index in indices if not index.startswith(".")]
-            )
-        except Exception as e:
-            logger.exception("Could not list indices: %s", e)
-            return ""
-    async def _arun(
-        self,
-        separator: str = "",
-        run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
-    ) -> str:
-        raise NotImplementedError("ListIndicesTool does not support async operations")