setup / tests /til_test.py
AnanthulaShravya's picture
Rename til_test.py to tests/til_test.py
df4b6f7 verified
import pytest
from growthy_agents.crew.til import TilCrew # type: ignore
examples = [
("The sun rises in the east.", [
{"insightful_categorization": 'Low', "factuality_categorization": 'High', "simplicity_categorization": 'High', "grammatical_categorization": 'High'}]),
("* Quantization is the process of reducing the size of LLM models by reducing the underlying weights.\n"
"* In quantization the weights are reduced by scaling up the datatypes from a datatype that takes smaller space to a data type that takes a larger space, this is also known as downcasting for example downcasting from int8 to float32.\n"
"* Advantages: takes lesser space and increases compute speed.\n"
"* Disadvantages: Answers are less precise because of the loss of precision in the LLM model weights.\n", [
{"insightful_categorization": 'Meidum', "factuality_categorization": 'High',
"simplicity_categorization": 'High', "grammatical_categorization": 'High'},
{"insightful_categorization": 'High', "factuality_categorization": 'Low',
"simplicity_categorization": 'High', "grammatical_categorization": 'High'},
{"insightful_categorization": 'High', "factuality_categorization": 'High',
"simplicity_categorization": 'High', "grammatical_categorization": 'High'},
{"insightful_categorization": 'High', "factuality_categorization": 'High',
"simplicity_categorization": 'High', "grammatical_categorization": 'High'},
]),
]
@pytest.mark.parametrize("input_text, expected_categorizations", examples)
def test_llm_evaluation(input_text, expected_categorizations):
til_crew = TilCrew()
til_crew.content = input_text
til_crew._gather_feedback()
response = til_crew.feedback_results
for idx, feedback in enumerate(response):
assert feedback["insightful_categorization"] == pytest.approx(
expected_categorizations[idx]["insightful_categorization"], abs=2.0)
assert feedback["factuality_categorization"] == pytest.approx(
expected_categorizations[idx]["factuality_categorization"], abs=2.0)
assert feedback["simplicity_categorization"] == pytest.approx(
expected_categorizations[idx]["simplicity_categorization"], abs=2.0)
assert feedback["grammatical_categorization"] == pytest.approx(
expected_categorizations[idx]["grammatical_categorization"], abs=2.0)