Spaces:
Running
Running
| """Shared LangSmith experiment runner.""" | |
| from langsmith import Client | |
| from langsmith import evaluate as ls_evaluate | |
| from langsmith_evals.dataset import DATASET_NAME | |
| from langsmith_evals.evaluators import evaluate_formats_tool_called_once | |
| _DEFAULT_EVALUATORS = [evaluate_formats_tool_called_once] | |
| def run(target, prefix: str, evaluators: list | None = None, dataset: str | None = None) -> None: | |
| """Run a LangSmith experiment against a dataset. | |
| :param target: Callable ``(inputs: dict) -> dict`` to evaluate. | |
| :param prefix: Experiment name prefix shown in the LangSmith UI. | |
| :param evaluators: Evaluator functions. Defaults to result-count + formats-tool-called-once. | |
| :param dataset: Dataset name. Defaults to the main chord-sequences dataset. | |
| """ | |
| ls_evaluate( | |
| target, | |
| data=dataset or DATASET_NAME, | |
| evaluators=evaluators or _DEFAULT_EVALUATORS, | |
| experiment_prefix=prefix, | |
| client=Client(), | |
| ) | |