agent-flow / src /backend /base /langflow /components /embeddings /google_generative_ai.py

Upload folder using huggingface_hub

4b0794d verified about 1 year ago

5.53 kB

	# from langflow.field_typing import Data
	import numpy as np

	# TODO: remove ignore once the google package is published with types
	from google.ai.generativelanguage_v1beta.types import BatchEmbedContentsRequest
	from langchain_core.embeddings import Embeddings
	from langchain_google_genai import GoogleGenerativeAIEmbeddings
	from langchain_google_genai._common import GoogleGenerativeAIError

	from langflow.custom import Component
	from langflow.io import MessageTextInput, Output, SecretStrInput


	class GoogleGenerativeAIEmbeddingsComponent(Component):
	display_name = "Google Generative AI Embeddings"
	description = (
	"Connect to Google's generative AI embeddings service using the GoogleGenerativeAIEmbeddings class, "
	"found in the langchain-google-genai package."
	)
	documentation: str = "https://python.langchain.com/v0.2/docs/integrations/text_embedding/google_generative_ai/"
	icon = "Google"
	name = "Google Generative AI Embeddings"

	inputs = [
	SecretStrInput(name="api_key", display_name="API Key"),
	MessageTextInput(name="model_name", display_name="Model Name", value="models/text-embedding-004"),
	]

	outputs = [
	Output(display_name="Embeddings", name="embeddings", method="build_embeddings"),
	]

	def build_embeddings(self) -> Embeddings:
	if not self.api_key:
	msg = "API Key is required"
	raise ValueError(msg)

	class HotaGoogleGenerativeAIEmbeddings(GoogleGenerativeAIEmbeddings):
	def __init__(self, args, *kwargs) -> None:
	super(GoogleGenerativeAIEmbeddings, self).__init__(args, *kwargs)

	def embed_documents(
	self,
	texts: list[str],
	*,
	batch_size: int = 100,
	task_type: str \| None = None,
	titles: list[str] \| None = None,
	output_dimensionality: int \| None = 1536,
	) -> list[list[float]]:
	"""Embed a list of strings.

	Google Generative AI currently sets a max batch size of 100 strings.

	Args:
	texts: List[str] The list of strings to embed.
	batch_size: [int] The batch size of embeddings to send to the model
	task_type: task_type (https://ai.google.dev/api/rest/v1/TaskType)
	titles: An optional list of titles for texts provided.
	Only applicable when TaskType is RETRIEVAL_DOCUMENT.
	output_dimensionality: Optional reduced dimension for the output embedding.
	https://ai.google.dev/api/rest/v1/models/batchEmbedContents#EmbedContentRequest
	Returns:
	List of embeddings, one for each text.
	"""
	embeddings: list[list[float]] = []
	batch_start_index = 0
	for batch in GoogleGenerativeAIEmbeddings._prepare_batches(texts, batch_size):
	if titles:
	titles_batch = titles[batch_start_index : batch_start_index + len(batch)]
	batch_start_index += len(batch)
	else:
	titles_batch = [None] * len(batch) # type: ignore[list-item]

	requests = [
	self._prepare_request(
	text=text,
	task_type=task_type,
	title=title,
	output_dimensionality=output_dimensionality,
	)
	for text, title in zip(batch, titles_batch, strict=True)
	]

	try:
	result = self.client.batch_embed_contents(
	BatchEmbedContentsRequest(requests=requests, model=self.model)
	)
	except Exception as e:
	msg = f"Error embedding content: {e}"
	raise GoogleGenerativeAIError(msg) from e
	embeddings.extend([list(np.pad(e.values, (0, 768), "constant")) for e in result.embeddings])
	return embeddings

	def embed_query(
	self,
	text: str,
	task_type: str \| None = None,
	title: str \| None = None,
	output_dimensionality: int \| None = 1536,
	) -> list[float]:
	"""Embed a text.

	Args:
	text: The text to embed.
	task_type: task_type (https://ai.google.dev/api/rest/v1/TaskType)
	title: An optional title for the text.
	Only applicable when TaskType is RETRIEVAL_DOCUMENT.
	output_dimensionality: Optional reduced dimension for the output embedding.
	https://ai.google.dev/api/rest/v1/models/batchEmbedContents#EmbedContentRequest

	Returns:
	Embedding for the text.
	"""
	task_type = task_type or "RETRIEVAL_QUERY"
	return self.embed_documents(
	[text],
	task_type=task_type,
	titles=[title] if title else None,
	output_dimensionality=output_dimensionality,
	)[0]

	return HotaGoogleGenerativeAIEmbeddings(model=self.model_name, google_api_key=self.api_key)