|
|
from langchain_community.vectorstores import Clickhouse, ClickhouseSettings |
|
|
|
|
|
from langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store |
|
|
from langflow.helpers.data import docs_to_data |
|
|
from langflow.inputs import BoolInput, FloatInput |
|
|
from langflow.io import ( |
|
|
DataInput, |
|
|
DictInput, |
|
|
DropdownInput, |
|
|
HandleInput, |
|
|
IntInput, |
|
|
MultilineInput, |
|
|
SecretStrInput, |
|
|
StrInput, |
|
|
) |
|
|
from langflow.schema import Data |
|
|
|
|
|
|
|
|
class ClickhouseVectorStoreComponent(LCVectorStoreComponent): |
|
|
display_name = "Clickhouse" |
|
|
description = "Clickhouse Vector Store with search capabilities" |
|
|
documentation = "https://python.langchain.com/v0.2/docs/integrations/vectorstores/clickhouse/" |
|
|
name = "Clickhouse" |
|
|
icon = "Clickhouse" |
|
|
|
|
|
inputs = [ |
|
|
StrInput(name="host", display_name="hostname", required=True, value="localhost"), |
|
|
IntInput(name="port", display_name="port", required=True, value=8123), |
|
|
StrInput(name="database", display_name="database", required=True), |
|
|
StrInput(name="table", display_name="Table name", required=True), |
|
|
StrInput(name="username", display_name="The ClickHouse user name.", required=True), |
|
|
SecretStrInput(name="password", display_name="The password for username.", required=True), |
|
|
DropdownInput( |
|
|
name="index_type", |
|
|
display_name="index_type", |
|
|
options=["annoy", "vector_similarity"], |
|
|
info="Type of the index.", |
|
|
value="annoy", |
|
|
advanced=True, |
|
|
), |
|
|
DropdownInput( |
|
|
name="metric", |
|
|
display_name="metric", |
|
|
options=["angular", "euclidean", "manhattan", "hamming", "dot"], |
|
|
info="Metric to compute distance.", |
|
|
value="angular", |
|
|
advanced=True, |
|
|
), |
|
|
BoolInput( |
|
|
name="secure", |
|
|
display_name="Use https/TLS. This overrides inferred values from the interface or port arguments.", |
|
|
value=False, |
|
|
advanced=True, |
|
|
), |
|
|
StrInput(name="index_param", display_name="Param of the index", value="'L2Distance',100", advanced=True), |
|
|
DictInput(name="index_query_params", display_name="index query params", advanced=True), |
|
|
MultilineInput(name="search_query", display_name="Search Query"), |
|
|
DataInput(name="ingest_data", display_name="Ingest Data", is_list=True), |
|
|
HandleInput(name="embedding", display_name="Embedding", input_types=["Embeddings"]), |
|
|
IntInput( |
|
|
name="number_of_results", |
|
|
display_name="Number of Results", |
|
|
info="Number of results to return.", |
|
|
value=4, |
|
|
advanced=True, |
|
|
), |
|
|
FloatInput(name="score_threshold", display_name="Score threshold", advanced=True), |
|
|
] |
|
|
|
|
|
@check_cached_vector_store |
|
|
def build_vector_store(self) -> Clickhouse: |
|
|
try: |
|
|
import clickhouse_connect |
|
|
except ImportError as e: |
|
|
msg = ( |
|
|
"Failed to import Clickhouse dependencies. " |
|
|
"Install it using `pip install langflow[clickhouse-connect] --pre`" |
|
|
) |
|
|
raise ImportError(msg) from e |
|
|
|
|
|
try: |
|
|
client = clickhouse_connect.get_client(host=self.host, username=self.username, password=self.password) |
|
|
client.command("SELECT 1") |
|
|
except Exception as e: |
|
|
msg = f"Failed to connect to Clickhouse: {e}" |
|
|
raise ValueError(msg) from e |
|
|
|
|
|
documents = [] |
|
|
for _input in self.ingest_data or []: |
|
|
if isinstance(_input, Data): |
|
|
documents.append(_input.to_lc_document()) |
|
|
else: |
|
|
documents.append(_input) |
|
|
|
|
|
kwargs = {} |
|
|
if self.index_param: |
|
|
kwargs["index_param"] = self.index_param.split(",") |
|
|
if self.index_query_params: |
|
|
kwargs["index_query_params"] = self.index_query_params |
|
|
|
|
|
settings = ClickhouseSettings( |
|
|
table=self.table, |
|
|
database=self.database, |
|
|
host=self.host, |
|
|
index_type=self.index_type, |
|
|
metric=self.metric, |
|
|
password=self.password, |
|
|
port=self.port, |
|
|
secure=self.secure, |
|
|
username=self.username, |
|
|
**kwargs, |
|
|
) |
|
|
if documents: |
|
|
clickhouse_vs = Clickhouse.from_documents(documents=documents, embedding=self.embedding, config=settings) |
|
|
|
|
|
else: |
|
|
clickhouse_vs = Clickhouse(embedding=self.embedding, config=settings) |
|
|
|
|
|
return clickhouse_vs |
|
|
|
|
|
def search_documents(self) -> list[Data]: |
|
|
vector_store = self.build_vector_store() |
|
|
|
|
|
if self.search_query and isinstance(self.search_query, str) and self.search_query.strip(): |
|
|
kwargs = {} |
|
|
if self.score_threshold: |
|
|
kwargs["score_threshold"] = self.score_threshold |
|
|
|
|
|
docs = vector_store.similarity_search(query=self.search_query, k=self.number_of_results, **kwargs) |
|
|
|
|
|
data = docs_to_data(docs) |
|
|
self.status = data |
|
|
return data |
|
|
return [] |
|
|
|