Spaces:
Runtime error
Runtime error
| """Loading datasets and evaluators.""" | |
| from typing import Any, Dict, List, Optional, Sequence, Type, Union | |
| from langchain_core.language_models import BaseLanguageModel | |
| from langchain.chains.base import Chain | |
| from langchain.chat_models.openai import ChatOpenAI | |
| from langchain.evaluation.agents.trajectory_eval_chain import TrajectoryEvalChain | |
| from langchain.evaluation.comparison import PairwiseStringEvalChain | |
| from langchain.evaluation.comparison.eval_chain import LabeledPairwiseStringEvalChain | |
| from langchain.evaluation.criteria.eval_chain import ( | |
| CriteriaEvalChain, | |
| LabeledCriteriaEvalChain, | |
| ) | |
| from langchain.evaluation.embedding_distance.base import ( | |
| EmbeddingDistanceEvalChain, | |
| PairwiseEmbeddingDistanceEvalChain, | |
| ) | |
| from langchain.evaluation.exact_match.base import ExactMatchStringEvaluator | |
| from langchain.evaluation.parsing.base import ( | |
| JsonEqualityEvaluator, | |
| JsonValidityEvaluator, | |
| ) | |
| from langchain.evaluation.parsing.json_distance import JsonEditDistanceEvaluator | |
| from langchain.evaluation.parsing.json_schema import JsonSchemaEvaluator | |
| from langchain.evaluation.qa import ContextQAEvalChain, CotQAEvalChain, QAEvalChain | |
| from langchain.evaluation.regex_match.base import RegexMatchStringEvaluator | |
| from langchain.evaluation.schema import EvaluatorType, LLMEvalChain, StringEvaluator | |
| from langchain.evaluation.scoring.eval_chain import ( | |
| LabeledScoreStringEvalChain, | |
| ScoreStringEvalChain, | |
| ) | |
| from langchain.evaluation.string_distance.base import ( | |
| PairwiseStringDistanceEvalChain, | |
| StringDistanceEvalChain, | |
| ) | |
| def load_dataset(uri: str) -> List[Dict]: | |
| """Load a dataset from the `LangChainDatasets on HuggingFace <https://huggingface.co/LangChainDatasets>`_. | |
| Args: | |
| uri: The uri of the dataset to load. | |
| Returns: | |
| A list of dictionaries, each representing a row in the dataset. | |
| **Prerequisites** | |
| .. code-block:: shell | |
| pip install datasets | |
| Examples | |
| -------- | |
| .. code-block:: python | |
| from langchain.evaluation import load_dataset | |
| ds = load_dataset("llm-math") | |
| """ # noqa: E501 | |
| try: | |
| from datasets import load_dataset | |
| except ImportError: | |
| raise ImportError( | |
| "load_dataset requires the `datasets` package." | |
| " Please install with `pip install datasets`" | |
| ) | |
| dataset = load_dataset(f"LangChainDatasets/{uri}") | |
| return [d for d in dataset["train"]] | |
| _EVALUATOR_MAP: Dict[ | |
| EvaluatorType, Union[Type[LLMEvalChain], Type[Chain], Type[StringEvaluator]] | |
| ] = { | |
| EvaluatorType.QA: QAEvalChain, | |
| EvaluatorType.COT_QA: CotQAEvalChain, | |
| EvaluatorType.CONTEXT_QA: ContextQAEvalChain, | |
| EvaluatorType.PAIRWISE_STRING: PairwiseStringEvalChain, | |
| EvaluatorType.SCORE_STRING: ScoreStringEvalChain, | |
| EvaluatorType.LABELED_PAIRWISE_STRING: LabeledPairwiseStringEvalChain, | |
| EvaluatorType.LABELED_SCORE_STRING: LabeledScoreStringEvalChain, | |
| EvaluatorType.AGENT_TRAJECTORY: TrajectoryEvalChain, | |
| EvaluatorType.CRITERIA: CriteriaEvalChain, | |
| EvaluatorType.LABELED_CRITERIA: LabeledCriteriaEvalChain, | |
| EvaluatorType.STRING_DISTANCE: StringDistanceEvalChain, | |
| EvaluatorType.PAIRWISE_STRING_DISTANCE: PairwiseStringDistanceEvalChain, | |
| EvaluatorType.EMBEDDING_DISTANCE: EmbeddingDistanceEvalChain, | |
| EvaluatorType.PAIRWISE_EMBEDDING_DISTANCE: PairwiseEmbeddingDistanceEvalChain, | |
| EvaluatorType.JSON_VALIDITY: JsonValidityEvaluator, | |
| EvaluatorType.JSON_EQUALITY: JsonEqualityEvaluator, | |
| EvaluatorType.JSON_EDIT_DISTANCE: JsonEditDistanceEvaluator, | |
| EvaluatorType.JSON_SCHEMA_VALIDATION: JsonSchemaEvaluator, | |
| EvaluatorType.REGEX_MATCH: RegexMatchStringEvaluator, | |
| EvaluatorType.EXACT_MATCH: ExactMatchStringEvaluator, | |
| } | |
| def load_evaluator( | |
| evaluator: EvaluatorType, | |
| *, | |
| llm: Optional[BaseLanguageModel] = None, | |
| **kwargs: Any, | |
| ) -> Union[Chain, StringEvaluator]: | |
| """Load the requested evaluation chain specified by a string. | |
| Parameters | |
| ---------- | |
| evaluator : EvaluatorType | |
| The type of evaluator to load. | |
| llm : BaseLanguageModel, optional | |
| The language model to use for evaluation, by default None | |
| **kwargs : Any | |
| Additional keyword arguments to pass to the evaluator. | |
| Returns | |
| ------- | |
| Chain | |
| The loaded evaluation chain. | |
| Examples | |
| -------- | |
| >>> from langchain.evaluation import load_evaluator, EvaluatorType | |
| >>> evaluator = load_evaluator(EvaluatorType.QA) | |
| """ | |
| if evaluator not in _EVALUATOR_MAP: | |
| raise ValueError( | |
| f"Unknown evaluator type: {evaluator}" | |
| f"\nValid types are: {list(_EVALUATOR_MAP.keys())}" | |
| ) | |
| evaluator_cls = _EVALUATOR_MAP[evaluator] | |
| if issubclass(evaluator_cls, LLMEvalChain): | |
| try: | |
| llm = llm or ChatOpenAI( | |
| model="gpt-4", model_kwargs={"seed": 42}, temperature=0 | |
| ) | |
| except Exception as e: | |
| raise ValueError( | |
| f"Evaluation with the {evaluator_cls} requires a " | |
| "language model to function." | |
| " Failed to create the default 'gpt-4' model." | |
| " Please manually provide an evaluation LLM" | |
| " or check your openai credentials." | |
| ) from e | |
| return evaluator_cls.from_llm(llm=llm, **kwargs) | |
| else: | |
| return evaluator_cls(**kwargs) | |
| def load_evaluators( | |
| evaluators: Sequence[EvaluatorType], | |
| *, | |
| llm: Optional[BaseLanguageModel] = None, | |
| config: Optional[dict] = None, | |
| **kwargs: Any, | |
| ) -> List[Union[Chain, StringEvaluator]]: | |
| """Load evaluators specified by a list of evaluator types. | |
| Parameters | |
| ---------- | |
| evaluators : Sequence[EvaluatorType] | |
| The list of evaluator types to load. | |
| llm : BaseLanguageModel, optional | |
| The language model to use for evaluation, if none is provided, a default | |
| ChatOpenAI gpt-4 model will be used. | |
| config : dict, optional | |
| A dictionary mapping evaluator types to additional keyword arguments, | |
| by default None | |
| **kwargs : Any | |
| Additional keyword arguments to pass to all evaluators. | |
| Returns | |
| ------- | |
| List[Chain] | |
| The loaded evaluators. | |
| Examples | |
| -------- | |
| >>> from langchain.evaluation import load_evaluators, EvaluatorType | |
| >>> evaluators = [EvaluatorType.QA, EvaluatorType.CRITERIA] | |
| >>> loaded_evaluators = load_evaluators(evaluators, criteria="helpfulness") | |
| """ | |
| loaded = [] | |
| for evaluator in evaluators: | |
| _kwargs = config.get(evaluator, {}) if config else {} | |
| loaded.append(load_evaluator(evaluator, llm=llm, **{**kwargs, **_kwargs})) | |
| return loaded | |