RippleGPT-Nano / validation /code /test_cases.py

Upload folder using huggingface_hub

148b631 verified 28 days ago

9.86 kB

	"""
	test_cases.py - Test cases for code completion validation.

	Defines specific tests to evaluate if RippleGPT understands
	hierarchical code structures.
	"""

	from dataclasses import dataclass
	from typing import List, Callable, Optional
	import re


	@dataclass
	class TestCase:
	"""Represents a code completion test case."""
	name: str
	category: str
	prompt: str
	expected_patterns: List[str] # Regex patterns that MUST appear in output
	forbidden_patterns: List[str] = None # Patterns that MUST NOT appear
	max_tokens: int = 50
	description: str = ""

	def __post_init__(self):
	if self.forbidden_patterns is None:
	self.forbidden_patterns = []


	# =============================================================================
	# CATEGORY 1: BRACKET CLOSING
	# Tests if the model can close parentheses, braces, and brackets
	# =============================================================================

	BRACKET_TESTS = [
	TestCase(
	name="simple_parenthesis",
	category="brackets",
	prompt="def hello(name",
	expected_patterns=[r"\)"], # Should close parenthesis
	max_tokens=20,
	description="Should close simple function parenthesis"
	),
	TestCase(
	name="multiple_args",
	category="brackets",
	prompt="def calculate(a, b, c",
	expected_patterns=[r"\)", r":"], # Should close and add ':'
	max_tokens=20,
	description="Should close parenthesis with multiple arguments"
	),
	TestCase(
	name="nested_parenthesis",
	category="brackets",
	prompt="result = sum(range(10",
	expected_patterns=[r"\)\)"], # Should close both
	max_tokens=20,
	description="Should close nested parentheses"
	),
	TestCase(
	name="list_bracket",
	category="brackets",
	prompt="items = [1, 2, 3",
	expected_patterns=[r"\]"],
	max_tokens=20,
	description="Should close list bracket"
	),
	TestCase(
	name="dict_brace",
	category="brackets",
	prompt='data = {"name": "test"',
	expected_patterns=[r"\}"],
	max_tokens=20,
	description="Should close dictionary brace"
	),
	TestCase(
	name="function_call_chain",
	category="brackets",
	prompt="text.strip().lower(",
	expected_patterns=[r"\)"],
	max_tokens=20,
	description="Should close parenthesis in method chain"
	),
	]

	# =============================================================================
	# CATEGORY 2: PYTHON INDENTATION
	# Tests if the model maintains correct indentation after blocks
	# =============================================================================

	INDENTATION_TESTS = [
	TestCase(
	name="if_indent",
	category="indentation",
	prompt="if x > 0:\n",
	expected_patterns=[r"^ \S", r"^\t\S"], # Should indent 4 spaces or tab
	max_tokens=30,
	description="Should indent after if statement"
	),
	TestCase(
	name="for_indent",
	category="indentation",
	prompt="for i in range(10):\n",
	expected_patterns=[r" \S"],
	max_tokens=30,
	description="Should indent after for loop"
	),
	TestCase(
	name="def_indent",
	category="indentation",
	prompt="def process(data):\n",
	expected_patterns=[r" "],
	max_tokens=30,
	description="Should indent function body"
	),
	TestCase(
	name="class_indent",
	category="indentation",
	prompt="class MyClass:\n",
	expected_patterns=[r" "],
	max_tokens=30,
	description="Should indent class body"
	),
	TestCase(
	name="nested_indent",
	category="indentation",
	prompt="def foo():\n if True:\n",
	expected_patterns=[r" \S"], # 8 spaces (double indentation)
	max_tokens=30,
	description="Should maintain nested indentation"
	),
	TestCase(
	name="try_except_indent",
	category="indentation",
	prompt="try:\n x = 1\nexcept:\n",
	expected_patterns=[r" "],
	max_tokens=30,
	description="Should indent except block"
	),
	]

	# =============================================================================
	# CATEGORY 3: CODE STRUCTURE
	# Tests if the model understands common code patterns
	# =============================================================================

	STRUCTURE_TESTS = [
	TestCase(
	name="return_statement",
	category="structure",
	prompt="def add(a, b):\n return a",
	expected_patterns=[r"\+\s*b", r"a \+ b"],
	max_tokens=20,
	description="Should complete addition operation"
	),
	TestCase(
	name="for_loop_pattern",
	category="structure",
	prompt="for i in range(",
	expected_patterns=[r"\d+\)"], # Number followed by )
	max_tokens=20,
	description="Should complete range() with number"
	),
	TestCase(
	name="import_statement",
	category="structure",
	prompt="import os\nimport sys\nimport ",
	expected_patterns=[r"[a-z]+"], # Module name
	forbidden_patterns=[r"^\d"], # Must not start with digit
	max_tokens=20,
	description="Should suggest valid module name"
	),
	TestCase(
	name="list_comprehension",
	category="structure",
	prompt="squares = [x**2 for x in ",
	expected_patterns=[r"range\(\|list\(\|\["],
	max_tokens=30,
	description="Should complete list comprehension"
	),
	TestCase(
	name="method_definition",
	category="structure",
	prompt="class Dog:\n def __init__(self",
	expected_patterns=[r"\)", r":"],
	max_tokens=30,
	description="Should complete __init__ definition"
	),
	TestCase(
	name="conditional_else",
	category="structure",
	prompt="if condition:\n do_something()\nelse",
	expected_patterns=[r":"],
	max_tokens=20,
	description="Should add ':' after else"
	),
	]

	# =============================================================================
	# CATEGORY 4: LONG CONTEXT
	# Tests if the model maintains coherence in longer code
	# =============================================================================

	LONG_CONTEXT_TESTS = [
	TestCase(
	name="function_body",
	category="long_context",
	prompt="""def calculate_average(numbers):
	if not numbers:
	return 0
	total = 0
	for num in numbers:
	total +="""
	,
	expected_patterns=[r"num"], # Should use loop variable
	max_tokens=20,
	description="Should recall loop variable"
	),
	TestCase(
	name="class_method_reference",
	category="long_context",
	prompt="""class Calculator:
	def __init__(self):
	self.result = 0

	def add(self, value):
	self.result +="""
	,
	expected_patterns=[r"value"], # Should use parameter
	max_tokens=20,
	description="Should reference method parameter"
	),
	TestCase(
	name="variable_reuse",
	category="long_context",
	prompt="""data = load_file("input.txt")
	processed = clean_data(data)
	result = analyze("""
	,
	expected_patterns=[r"processed\|data"], # Should use defined variable
	max_tokens=20,
	description="Should reuse previously defined variable"
	),
	]

	# =============================================================================
	# CATEGORY 5: PYTHON IDIOMS
	# Tests knowledge of Python idioms
	# =============================================================================

	PYTHON_IDIOM_TESTS = [
	TestCase(
	name="with_statement",
	category="python_idioms",
	prompt='with open("file.txt", "r") as',
	expected_patterns=[r"f:\|file:\|handle:"],
	max_tokens=20,
	description="Should complete with statement"
	),
	TestCase(
	name="f_string",
	category="python_idioms",
	prompt='name = "World"\ngreeting = f"Hello, {',
	expected_patterns=[r"name"],
	max_tokens=20,
	description="Should use variable in f-string"
	),
	TestCase(
	name="lambda",
	category="python_idioms",
	prompt="double = lambda x:",
	expected_patterns=[r"x\s\\s2\|2\s\\sx"],
	max_tokens=20,
	description="Should complete lambda correctly"
	),
	TestCase(
	name="enumerate",
	category="python_idioms",
	prompt="for i, item in enumerate(",
	expected_patterns=[r"[a-z_]+\)"], # iterable followed by )
	max_tokens=20,
	description="Should complete enumerate"
	),
	]


	def get_all_test_cases() -> List[TestCase]:
	"""Returns all test cases."""
	return (
	BRACKET_TESTS +
	INDENTATION_TESTS +
	STRUCTURE_TESTS +
	LONG_CONTEXT_TESTS +
	PYTHON_IDIOM_TESTS
	)


	def get_tests_by_category(category: str) -> List[TestCase]:
	"""Returns tests for a specific category."""
	all_tests = get_all_test_cases()
	return [t for t in all_tests if t.category == category]


	def get_categories() -> List[str]:
	"""Returns list of available categories."""
	return [
	"brackets",
	"indentation",
	"structure",
	"long_context",
	"python_idioms"
	]


	if __name__ == '__main__':
	# List all available tests
	print("📋 Available Test Cases:")
	print("=" * 60)

	for category in get_categories():
	tests = get_tests_by_category(category)
	print(f"\n[{category.upper()}] ({len(tests)} tests)")
	for test in tests:
	print(f" • {test.name}: {test.description}")

	print(f"\n📊 Total: {len(get_all_test_cases())} tests")