Spaces:

zhangyi617
/

webui

Runtime error

App Files Files Community

webui / langchain /callbacks /confident_callback.py

zhangyi617

Upload folder using huggingface_hub

129cd69 over 2 years ago

raw

history blame contribute delete

6.32 kB

	# flake8: noqa
	import os
	import warnings
	from typing import Any, Dict, List, Optional, Union

	from langchain.callbacks.base import BaseCallbackHandler
	from langchain_core.agents import AgentAction, AgentFinish
	from langchain_core.outputs import LLMResult


	class DeepEvalCallbackHandler(BaseCallbackHandler):
	"""Callback Handler that logs into deepeval.

	Args:
	implementation_name: name of the `implementation` in deepeval
	metrics: A list of metrics

	Raises:
	ImportError: if the `deepeval` package is not installed.

	Examples:
	>>> from langchain.llms import OpenAI
	>>> from langchain.callbacks import DeepEvalCallbackHandler
	>>> from deepeval.metrics import AnswerRelevancy
	>>> metric = AnswerRelevancy(minimum_score=0.3)
	>>> deepeval_callback = DeepEvalCallbackHandler(
	... implementation_name="exampleImplementation",
	... metrics=[metric],
	... )
	>>> llm = OpenAI(
	... temperature=0,
	... callbacks=[deepeval_callback],
	... verbose=True,
	... openai_api_key="API_KEY_HERE",
	... )
	>>> llm.generate([
	... "What is the best evaluation tool out there? (no bias at all)",
	... ])
	"Deepeval, no doubt about it."
	"""

	REPO_URL: str = "https://github.com/confident-ai/deepeval"
	ISSUES_URL: str = f"{REPO_URL}/issues"
	BLOG_URL: str = "https://docs.confident-ai.com" # noqa: E501

	def __init__(
	self,
	metrics: List[Any],
	implementation_name: Optional[str] = None,
	) -> None:
	"""Initializes the `deepevalCallbackHandler`.

	Args:
	implementation_name: Name of the implementation you want.
	metrics: What metrics do you want to track?

	Raises:
	ImportError: if the `deepeval` package is not installed.
	ConnectionError: if the connection to deepeval fails.
	"""

	super().__init__()

	# Import deepeval (not via `import_deepeval` to keep hints in IDEs)
	try:
	import deepeval # ignore: F401,I001
	except ImportError:
	raise ImportError(
	"""To use the deepeval callback manager you need to have the
	`deepeval` Python package installed. Please install it with
	`pip install deepeval`"""
	)

	if os.path.exists(".deepeval"):
	warnings.warn(
	"""You are currently not logging anything to the dashboard, we
	recommend using `deepeval login`."""
	)

	# Set the deepeval variables
	self.implementation_name = implementation_name
	self.metrics = metrics

	warnings.warn(
	(
	"The `DeepEvalCallbackHandler` is currently in beta and is subject to"
	" change based on updates to `langchain`. Please report any issues to"
	f" {self.ISSUES_URL} as an `integration` issue."
	),
	)

	def on_llm_start(
	self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
	) -> None:
	"""Store the prompts"""
	self.prompts = prompts

	def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
	"""Do nothing when a new token is generated."""
	pass

	def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
	"""Log records to deepeval when an LLM ends."""
	from deepeval.metrics.answer_relevancy import AnswerRelevancy
	from deepeval.metrics.bias_classifier import UnBiasedMetric
	from deepeval.metrics.metric import Metric
	from deepeval.metrics.toxic_classifier import NonToxicMetric

	for metric in self.metrics:
	for i, generation in enumerate(response.generations):
	# Here, we only measure the first generation's output
	output = generation[0].text
	query = self.prompts[i]
	if isinstance(metric, AnswerRelevancy):
	result = metric.measure(
	output=output,
	query=query,
	)
	print(f"Answer Relevancy: {result}")
	elif isinstance(metric, UnBiasedMetric):
	score = metric.measure(output)
	print(f"Bias Score: {score}")
	elif isinstance(metric, NonToxicMetric):
	score = metric.measure(output)
	print(f"Toxic Score: {score}")
	else:
	raise ValueError(
	f"""Metric {metric.__name__} is not supported by deepeval
	callbacks."""
	)

	def on_llm_error(self, error: BaseException, **kwargs: Any) -> None:
	"""Do nothing when LLM outputs an error."""
	pass

	def on_chain_start(
	self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any
	) -> None:
	"""Do nothing when chain starts"""
	pass

	def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> None:
	"""Do nothing when chain ends."""
	pass

	def on_chain_error(self, error: BaseException, **kwargs: Any) -> None:
	"""Do nothing when LLM chain outputs an error."""
	pass

	def on_tool_start(
	self,
	serialized: Dict[str, Any],
	input_str: str,
	**kwargs: Any,
	) -> None:
	"""Do nothing when tool starts."""
	pass

	def on_agent_action(self, action: AgentAction, **kwargs: Any) -> Any:
	"""Do nothing when agent takes a specific action."""
	pass

	def on_tool_end(
	self,
	output: str,
	observation_prefix: Optional[str] = None,
	llm_prefix: Optional[str] = None,
	**kwargs: Any,
	) -> None:
	"""Do nothing when tool ends."""
	pass

	def on_tool_error(self, error: BaseException, **kwargs: Any) -> None:
	"""Do nothing when tool outputs an error."""
	pass

	def on_text(self, text: str, **kwargs: Any) -> None:
	"""Do nothing"""
	pass

	def on_agent_finish(self, finish: AgentFinish, **kwargs: Any) -> None:
	"""Do nothing"""
	pass