Spaces:
No application file
No application file
| import pytest | |
| from embedchain.config.evaluation.base import ContextRelevanceConfig | |
| from embedchain.evaluation.metrics import ContextRelevance | |
| from embedchain.utils.evaluation import EvalData, EvalMetric | |
| def mock_data(): | |
| return [ | |
| EvalData( | |
| contexts=[ | |
| "This is a test context 1.", | |
| ], | |
| question="This is a test question 1.", | |
| answer="This is a test answer 1.", | |
| ), | |
| EvalData( | |
| contexts=[ | |
| "This is a test context 2-1.", | |
| "This is a test context 2-2.", | |
| ], | |
| question="This is a test question 2.", | |
| answer="This is a test answer 2.", | |
| ), | |
| ] | |
| def mock_context_relevance_metric(monkeypatch): | |
| monkeypatch.setenv("OPENAI_API_KEY", "test_api_key") | |
| metric = ContextRelevance() | |
| return metric | |
| def test_context_relevance_init(monkeypatch): | |
| monkeypatch.setenv("OPENAI_API_KEY", "test_api_key") | |
| metric = ContextRelevance() | |
| assert metric.name == EvalMetric.CONTEXT_RELEVANCY.value | |
| assert metric.config.model == "gpt-4" | |
| assert metric.config.api_key is None | |
| assert metric.config.language == "en" | |
| monkeypatch.delenv("OPENAI_API_KEY") | |
| def test_context_relevance_init_with_config(): | |
| metric = ContextRelevance(config=ContextRelevanceConfig(api_key="test_api_key")) | |
| assert metric.name == EvalMetric.CONTEXT_RELEVANCY.value | |
| assert metric.config.model == "gpt-4" | |
| assert metric.config.api_key == "test_api_key" | |
| assert metric.config.language == "en" | |
| def test_context_relevance_init_without_api_key(monkeypatch): | |
| monkeypatch.delenv("OPENAI_API_KEY", raising=False) | |
| with pytest.raises(ValueError): | |
| ContextRelevance() | |
| def test_sentence_segmenter(mock_context_relevance_metric): | |
| text = "This is a test sentence. This is another sentence." | |
| assert mock_context_relevance_metric._sentence_segmenter(text) == [ | |
| "This is a test sentence. ", | |
| "This is another sentence.", | |
| ] | |
| def test_compute_score(mock_context_relevance_metric, mock_data, monkeypatch): | |
| monkeypatch.setattr( | |
| mock_context_relevance_metric.client.chat.completions, | |
| "create", | |
| lambda model, messages: type( | |
| "obj", | |
| (object,), | |
| { | |
| "choices": [ | |
| type("obj", (object,), {"message": type("obj", (object,), {"content": "This is a test reponse."})}) | |
| ] | |
| }, | |
| )(), | |
| ) | |
| assert mock_context_relevance_metric._compute_score(mock_data[0]) == 1.0 | |
| assert mock_context_relevance_metric._compute_score(mock_data[1]) == 0.5 | |
| def test_evaluate(mock_context_relevance_metric, mock_data, monkeypatch): | |
| monkeypatch.setattr( | |
| mock_context_relevance_metric.client.chat.completions, | |
| "create", | |
| lambda model, messages: type( | |
| "obj", | |
| (object,), | |
| { | |
| "choices": [ | |
| type("obj", (object,), {"message": type("obj", (object,), {"content": "This is a test reponse."})}) | |
| ] | |
| }, | |
| )(), | |
| ) | |
| assert mock_context_relevance_metric.evaluate(mock_data) == 0.75 | |