Spaces:
Sleeping
Sleeping
| import pytest | |
| from growthy_agents.crew.til import TilCrew # type: ignore | |
| examples = [ | |
| ("The sun rises in the east.", [ | |
| {"insightful_categorization": 'Low', "factuality_categorization": 'High', "simplicity_categorization": 'High', "grammatical_categorization": 'High'}]), | |
| ("* Quantization is the process of reducing the size of LLM models by reducing the underlying weights.\n" | |
| "* In quantization the weights are reduced by scaling up the datatypes from a datatype that takes smaller space to a data type that takes a larger space, this is also known as downcasting for example downcasting from int8 to float32.\n" | |
| "* Advantages: takes lesser space and increases compute speed.\n" | |
| "* Disadvantages: Answers are less precise because of the loss of precision in the LLM model weights.\n", [ | |
| {"insightful_categorization": 'Meidum', "factuality_categorization": 'High', | |
| "simplicity_categorization": 'High', "grammatical_categorization": 'High'}, | |
| {"insightful_categorization": 'High', "factuality_categorization": 'Low', | |
| "simplicity_categorization": 'High', "grammatical_categorization": 'High'}, | |
| {"insightful_categorization": 'High', "factuality_categorization": 'High', | |
| "simplicity_categorization": 'High', "grammatical_categorization": 'High'}, | |
| {"insightful_categorization": 'High', "factuality_categorization": 'High', | |
| "simplicity_categorization": 'High', "grammatical_categorization": 'High'}, | |
| ]), | |
| ] | |
| def test_llm_evaluation(input_text, expected_categorizations): | |
| til_crew = TilCrew() | |
| til_crew.content = input_text | |
| til_crew._gather_feedback() | |
| response = til_crew.feedback_results | |
| for idx, feedback in enumerate(response): | |
| assert feedback["insightful_categorization"] == pytest.approx( | |
| expected_categorizations[idx]["insightful_categorization"], abs=2.0) | |
| assert feedback["factuality_categorization"] == pytest.approx( | |
| expected_categorizations[idx]["factuality_categorization"], abs=2.0) | |
| assert feedback["simplicity_categorization"] == pytest.approx( | |
| expected_categorizations[idx]["simplicity_categorization"], abs=2.0) | |
| assert feedback["grammatical_categorization"] == pytest.approx( | |
| expected_categorizations[idx]["grammatical_categorization"], abs=2.0) | |