ohollo's picture
Evaluate rather the whole graph
267e0c0
Raw
History Blame Contribute Delete
966 Bytes
"""Shared LangSmith experiment runner."""
from langsmith import Client
from langsmith import evaluate as ls_evaluate
from langsmith_evals.dataset import DATASET_NAME
from langsmith_evals.evaluators import evaluate_formats_tool_called_once
_DEFAULT_EVALUATORS = [evaluate_formats_tool_called_once]
def run(target, prefix: str, evaluators: list | None = None, dataset: str | None = None) -> None:
"""Run a LangSmith experiment against a dataset.
:param target: Callable ``(inputs: dict) -> dict`` to evaluate.
:param prefix: Experiment name prefix shown in the LangSmith UI.
:param evaluators: Evaluator functions. Defaults to result-count + formats-tool-called-once.
:param dataset: Dataset name. Defaults to the main chord-sequences dataset.
"""
ls_evaluate(
target,
data=dataset or DATASET_NAME,
evaluators=evaluators or _DEFAULT_EVALUATORS,
experiment_prefix=prefix,
client=Client(),
)