""" test_cases.py - Test cases for code completion validation. Defines specific tests to evaluate if RippleGPT understands hierarchical code structures. """ from dataclasses import dataclass from typing import List, Callable, Optional import re @dataclass class TestCase: """Represents a code completion test case.""" name: str category: str prompt: str expected_patterns: List[str] # Regex patterns that MUST appear in output forbidden_patterns: List[str] = None # Patterns that MUST NOT appear max_tokens: int = 50 description: str = "" def __post_init__(self): if self.forbidden_patterns is None: self.forbidden_patterns = [] # ============================================================================= # CATEGORY 1: BRACKET CLOSING # Tests if the model can close parentheses, braces, and brackets # ============================================================================= BRACKET_TESTS = [ TestCase( name="simple_parenthesis", category="brackets", prompt="def hello(name", expected_patterns=[r"\)"], # Should close parenthesis max_tokens=20, description="Should close simple function parenthesis" ), TestCase( name="multiple_args", category="brackets", prompt="def calculate(a, b, c", expected_patterns=[r"\)", r":"], # Should close and add ':' max_tokens=20, description="Should close parenthesis with multiple arguments" ), TestCase( name="nested_parenthesis", category="brackets", prompt="result = sum(range(10", expected_patterns=[r"\)\)"], # Should close both max_tokens=20, description="Should close nested parentheses" ), TestCase( name="list_bracket", category="brackets", prompt="items = [1, 2, 3", expected_patterns=[r"\]"], max_tokens=20, description="Should close list bracket" ), TestCase( name="dict_brace", category="brackets", prompt='data = {"name": "test"', expected_patterns=[r"\}"], max_tokens=20, description="Should close dictionary brace" ), TestCase( name="function_call_chain", category="brackets", prompt="text.strip().lower(", expected_patterns=[r"\)"], max_tokens=20, description="Should close parenthesis in method chain" ), ] # ============================================================================= # CATEGORY 2: PYTHON INDENTATION # Tests if the model maintains correct indentation after blocks # ============================================================================= INDENTATION_TESTS = [ TestCase( name="if_indent", category="indentation", prompt="if x > 0:\n", expected_patterns=[r"^ \S", r"^\t\S"], # Should indent 4 spaces or tab max_tokens=30, description="Should indent after if statement" ), TestCase( name="for_indent", category="indentation", prompt="for i in range(10):\n", expected_patterns=[r" \S"], max_tokens=30, description="Should indent after for loop" ), TestCase( name="def_indent", category="indentation", prompt="def process(data):\n", expected_patterns=[r" "], max_tokens=30, description="Should indent function body" ), TestCase( name="class_indent", category="indentation", prompt="class MyClass:\n", expected_patterns=[r" "], max_tokens=30, description="Should indent class body" ), TestCase( name="nested_indent", category="indentation", prompt="def foo():\n if True:\n", expected_patterns=[r" \S"], # 8 spaces (double indentation) max_tokens=30, description="Should maintain nested indentation" ), TestCase( name="try_except_indent", category="indentation", prompt="try:\n x = 1\nexcept:\n", expected_patterns=[r" "], max_tokens=30, description="Should indent except block" ), ] # ============================================================================= # CATEGORY 3: CODE STRUCTURE # Tests if the model understands common code patterns # ============================================================================= STRUCTURE_TESTS = [ TestCase( name="return_statement", category="structure", prompt="def add(a, b):\n return a", expected_patterns=[r"\+\s*b", r"a \+ b"], max_tokens=20, description="Should complete addition operation" ), TestCase( name="for_loop_pattern", category="structure", prompt="for i in range(", expected_patterns=[r"\d+\)"], # Number followed by ) max_tokens=20, description="Should complete range() with number" ), TestCase( name="import_statement", category="structure", prompt="import os\nimport sys\nimport ", expected_patterns=[r"[a-z]+"], # Module name forbidden_patterns=[r"^\d"], # Must not start with digit max_tokens=20, description="Should suggest valid module name" ), TestCase( name="list_comprehension", category="structure", prompt="squares = [x**2 for x in ", expected_patterns=[r"range\(|list\(|\["], max_tokens=30, description="Should complete list comprehension" ), TestCase( name="method_definition", category="structure", prompt="class Dog:\n def __init__(self", expected_patterns=[r"\)", r":"], max_tokens=30, description="Should complete __init__ definition" ), TestCase( name="conditional_else", category="structure", prompt="if condition:\n do_something()\nelse", expected_patterns=[r":"], max_tokens=20, description="Should add ':' after else" ), ] # ============================================================================= # CATEGORY 4: LONG CONTEXT # Tests if the model maintains coherence in longer code # ============================================================================= LONG_CONTEXT_TESTS = [ TestCase( name="function_body", category="long_context", prompt="""def calculate_average(numbers): if not numbers: return 0 total = 0 for num in numbers: total +=""" , expected_patterns=[r"num"], # Should use loop variable max_tokens=20, description="Should recall loop variable" ), TestCase( name="class_method_reference", category="long_context", prompt="""class Calculator: def __init__(self): self.result = 0 def add(self, value): self.result +=""" , expected_patterns=[r"value"], # Should use parameter max_tokens=20, description="Should reference method parameter" ), TestCase( name="variable_reuse", category="long_context", prompt="""data = load_file("input.txt") processed = clean_data(data) result = analyze(""" , expected_patterns=[r"processed|data"], # Should use defined variable max_tokens=20, description="Should reuse previously defined variable" ), ] # ============================================================================= # CATEGORY 5: PYTHON IDIOMS # Tests knowledge of Python idioms # ============================================================================= PYTHON_IDIOM_TESTS = [ TestCase( name="with_statement", category="python_idioms", prompt='with open("file.txt", "r") as', expected_patterns=[r"f:|file:|handle:"], max_tokens=20, description="Should complete with statement" ), TestCase( name="f_string", category="python_idioms", prompt='name = "World"\ngreeting = f"Hello, {', expected_patterns=[r"name"], max_tokens=20, description="Should use variable in f-string" ), TestCase( name="lambda", category="python_idioms", prompt="double = lambda x:", expected_patterns=[r"x\s*\*\s*2|2\s*\*\s*x"], max_tokens=20, description="Should complete lambda correctly" ), TestCase( name="enumerate", category="python_idioms", prompt="for i, item in enumerate(", expected_patterns=[r"[a-z_]+\)"], # iterable followed by ) max_tokens=20, description="Should complete enumerate" ), ] def get_all_test_cases() -> List[TestCase]: """Returns all test cases.""" return ( BRACKET_TESTS + INDENTATION_TESTS + STRUCTURE_TESTS + LONG_CONTEXT_TESTS + PYTHON_IDIOM_TESTS ) def get_tests_by_category(category: str) -> List[TestCase]: """Returns tests for a specific category.""" all_tests = get_all_test_cases() return [t for t in all_tests if t.category == category] def get_categories() -> List[str]: """Returns list of available categories.""" return [ "brackets", "indentation", "structure", "long_context", "python_idioms" ] if __name__ == '__main__': # List all available tests print("šŸ“‹ Available Test Cases:") print("=" * 60) for category in get_categories(): tests = get_tests_by_category(category) print(f"\n[{category.upper()}] ({len(tests)} tests)") for test in tests: print(f" • {test.name}: {test.description}") print(f"\nšŸ“Š Total: {len(get_all_test_cases())} tests")