import pytest from growthy_agents.crew.til import TilCrew # type: ignore examples = [ ("The sun rises in the east.", [ {"insightful_categorization": 'Low', "factuality_categorization": 'High', "simplicity_categorization": 'High', "grammatical_categorization": 'High'}]), ("* Quantization is the process of reducing the size of LLM models by reducing the underlying weights.\n" "* In quantization the weights are reduced by scaling up the datatypes from a datatype that takes smaller space to a data type that takes a larger space, this is also known as downcasting for example downcasting from int8 to float32.\n" "* Advantages: takes lesser space and increases compute speed.\n" "* Disadvantages: Answers are less precise because of the loss of precision in the LLM model weights.\n", [ {"insightful_categorization": 'Meidum', "factuality_categorization": 'High', "simplicity_categorization": 'High', "grammatical_categorization": 'High'}, {"insightful_categorization": 'High', "factuality_categorization": 'Low', "simplicity_categorization": 'High', "grammatical_categorization": 'High'}, {"insightful_categorization": 'High', "factuality_categorization": 'High', "simplicity_categorization": 'High', "grammatical_categorization": 'High'}, {"insightful_categorization": 'High', "factuality_categorization": 'High', "simplicity_categorization": 'High', "grammatical_categorization": 'High'}, ]), ] @pytest.mark.parametrize("input_text, expected_categorizations", examples) def test_llm_evaluation(input_text, expected_categorizations): til_crew = TilCrew() til_crew.content = input_text til_crew._gather_feedback() response = til_crew.feedback_results for idx, feedback in enumerate(response): assert feedback["insightful_categorization"] == pytest.approx( expected_categorizations[idx]["insightful_categorization"], abs=2.0) assert feedback["factuality_categorization"] == pytest.approx( expected_categorizations[idx]["factuality_categorization"], abs=2.0) assert feedback["simplicity_categorization"] == pytest.approx( expected_categorizations[idx]["simplicity_categorization"], abs=2.0) assert feedback["grammatical_categorization"] == pytest.approx( expected_categorizations[idx]["grammatical_categorization"], abs=2.0)