Spaces:

AnanthulaShravya
/

setup

Sleeping

App Files Files Community

setup / tests /til_test.py

AnanthulaShravya

Rename til_test.py to tests/til_test.py

df4b6f7 verified over 1 year ago

raw

history blame contribute delete

2.44 kB

	import pytest
	from growthy_agents.crew.til import TilCrew # type: ignore


	examples = [
	("The sun rises in the east.", [
	{"insightful_categorization": 'Low', "factuality_categorization": 'High', "simplicity_categorization": 'High', "grammatical_categorization": 'High'}]),
	("* Quantization is the process of reducing the size of LLM models by reducing the underlying weights.\n"
	"* In quantization the weights are reduced by scaling up the datatypes from a datatype that takes smaller space to a data type that takes a larger space, this is also known as downcasting for example downcasting from int8 to float32.\n"
	"* Advantages: takes lesser space and increases compute speed.\n"
	"* Disadvantages: Answers are less precise because of the loss of precision in the LLM model weights.\n", [
	{"insightful_categorization": 'Meidum', "factuality_categorization": 'High',
	"simplicity_categorization": 'High', "grammatical_categorization": 'High'},
	{"insightful_categorization": 'High', "factuality_categorization": 'Low',
	"simplicity_categorization": 'High', "grammatical_categorization": 'High'},
	{"insightful_categorization": 'High', "factuality_categorization": 'High',
	"simplicity_categorization": 'High', "grammatical_categorization": 'High'},
	{"insightful_categorization": 'High', "factuality_categorization": 'High',
	"simplicity_categorization": 'High', "grammatical_categorization": 'High'},
	]),
	]


	@pytest.mark.parametrize("input_text, expected_categorizations", examples)
	def test_llm_evaluation(input_text, expected_categorizations):
	til_crew = TilCrew()
	til_crew.content = input_text
	til_crew._gather_feedback()
	response = til_crew.feedback_results

	for idx, feedback in enumerate(response):
	assert feedback["insightful_categorization"] == pytest.approx(
	expected_categorizations[idx]["insightful_categorization"], abs=2.0)
	assert feedback["factuality_categorization"] == pytest.approx(
	expected_categorizations[idx]["factuality_categorization"], abs=2.0)
	assert feedback["simplicity_categorization"] == pytest.approx(
	expected_categorizations[idx]["simplicity_categorization"], abs=2.0)
	assert feedback["grammatical_categorization"] == pytest.approx(
	expected_categorizations[idx]["grammatical_categorization"], abs=2.0)