Spaces:
Sleeping
Sleeping
File size: 2,443 Bytes
8c27dd8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 | import pytest
from growthy_agents.crew.til import TilCrew # type: ignore
examples = [
("The sun rises in the east.", [
{"insightful_categorization": 'Low', "factuality_categorization": 'High', "simplicity_categorization": 'High', "grammatical_categorization": 'High'}]),
("* Quantization is the process of reducing the size of LLM models by reducing the underlying weights.\n"
"* In quantization the weights are reduced by scaling up the datatypes from a datatype that takes smaller space to a data type that takes a larger space, this is also known as downcasting for example downcasting from int8 to float32.\n"
"* Advantages: takes lesser space and increases compute speed.\n"
"* Disadvantages: Answers are less precise because of the loss of precision in the LLM model weights.\n", [
{"insightful_categorization": 'Meidum', "factuality_categorization": 'High',
"simplicity_categorization": 'High', "grammatical_categorization": 'High'},
{"insightful_categorization": 'High', "factuality_categorization": 'Low',
"simplicity_categorization": 'High', "grammatical_categorization": 'High'},
{"insightful_categorization": 'High', "factuality_categorization": 'High',
"simplicity_categorization": 'High', "grammatical_categorization": 'High'},
{"insightful_categorization": 'High', "factuality_categorization": 'High',
"simplicity_categorization": 'High', "grammatical_categorization": 'High'},
]),
]
@pytest.mark.parametrize("input_text, expected_categorizations", examples)
def test_llm_evaluation(input_text, expected_categorizations):
til_crew = TilCrew()
til_crew.content = input_text
til_crew._gather_feedback()
response = til_crew.feedback_results
for idx, feedback in enumerate(response):
assert feedback["insightful_categorization"] == pytest.approx(
expected_categorizations[idx]["insightful_categorization"], abs=2.0)
assert feedback["factuality_categorization"] == pytest.approx(
expected_categorizations[idx]["factuality_categorization"], abs=2.0)
assert feedback["simplicity_categorization"] == pytest.approx(
expected_categorizations[idx]["simplicity_categorization"], abs=2.0)
assert feedback["grammatical_categorization"] == pytest.approx(
expected_categorizations[idx]["grammatical_categorization"], abs=2.0)
|