Spaces:
Runtime error
Runtime error
| from langchain_openai import ChatOpenAI | |
| from app.workflows.courses.suggest_expectations import SuggestExpectations | |
| from langsmith.evaluation import LangChainStringEvaluator, evaluate | |
| from langsmith.schemas import Example, Run | |
| from typing import Any, Optional, TypedDict | |
| database_name = "course-learn-suggest-expectations" | |
| evaluator_llm = ChatOpenAI(model="gpt-4o-mini", temperature=0) | |
| class SingleEvaluatorInput(TypedDict): | |
| """The input to a `StringEvaluator`.""" | |
| prediction: str | |
| """The prediction string.""" | |
| reference: Optional[Any] | |
| """The reference string.""" | |
| input: Optional[str] | |
| """The input string.""" | |
| def generate_expectations(example: dict): | |
| chain = SuggestExpectations()._build_chain() | |
| response = chain.invoke({ | |
| "course": example["course"], "module": example["module"], "tasks": example["tasks"], | |
| "format_instructions": example["format_instructions"], | |
| "existing_expectations": example["existing_expectations"] | |
| }) | |
| return response | |
| def similarity_search(org_str, test_strs): | |
| most_similar = None | |
| min_similarity = float('inf') | |
| similarity_qa_evaluator = LangChainStringEvaluator( | |
| "embedding_distance", | |
| config={"distance_metric": "cosine"}, | |
| ) | |
| for test_itr in test_strs: | |
| eval_inputs = SingleEvaluatorInput( | |
| prediction=org_str, | |
| reference=test_itr | |
| ) | |
| result = similarity_qa_evaluator.evaluator.evaluate_strings( | |
| **eval_inputs) | |
| similarity_distance = result['score'] | |
| if abs(similarity_distance) < min_similarity: | |
| similarity = 1 - similarity_distance | |
| result['score'] = similarity | |
| most_similar = {"key": "similarity", **result, | |
| "prediction": test_itr, | |
| "reference": org_str} | |
| min_similarity = abs(similarity_distance) | |
| if most_similar: | |
| return most_similar | |
| def custom_evaluator(root_run: Run, example: Example) -> dict: | |
| results = [] | |
| for output_expectation_obj in root_run.outputs['expectations']: | |
| output_expectation = output_expectation_obj['expectation'] | |
| most_similar = similarity_search( | |
| output_expectation, | |
| [item["expectation"] for item in example.outputs["expectations"]] | |
| ) | |
| results.append(most_similar) | |
| return {"results": results} | |
| def build_evaluators(): | |
| response = evaluate( | |
| generate_expectations, | |
| data=database_name, | |
| evaluators=[custom_evaluator], | |
| experiment_prefix="alpha", | |
| ) | |
| build_evaluators() | |