File size: 9,857 Bytes

148b631

"""
test_cases.py - Test cases for code completion validation.

Defines specific tests to evaluate if RippleGPT understands
hierarchical code structures.
"""

from dataclasses import dataclass
from typing import List, Callable, Optional
import re


@dataclass
class TestCase:
    """Represents a code completion test case."""
    name: str
    category: str
    prompt: str
    expected_patterns: List[str]  # Regex patterns that MUST appear in output
    forbidden_patterns: List[str] = None  # Patterns that MUST NOT appear
    max_tokens: int = 50
    description: str = ""
    
    def __post_init__(self):
        if self.forbidden_patterns is None:
            self.forbidden_patterns = []


# =============================================================================
# CATEGORY 1: BRACKET CLOSING
# Tests if the model can close parentheses, braces, and brackets
# =============================================================================

BRACKET_TESTS = [
    TestCase(
        name="simple_parenthesis",
        category="brackets",
        prompt="def hello(name",
        expected_patterns=[r"\)"],  # Should close parenthesis
        max_tokens=20,
        description="Should close simple function parenthesis"
    ),
    TestCase(
        name="multiple_args",
        category="brackets",
        prompt="def calculate(a, b, c",
        expected_patterns=[r"\)", r":"],  # Should close and add ':'
        max_tokens=20,
        description="Should close parenthesis with multiple arguments"
    ),
    TestCase(
        name="nested_parenthesis",
        category="brackets",
        prompt="result = sum(range(10",
        expected_patterns=[r"\)\)"],  # Should close both
        max_tokens=20,
        description="Should close nested parentheses"
    ),
    TestCase(
        name="list_bracket",
        category="brackets",
        prompt="items = [1, 2, 3",
        expected_patterns=[r"\]"],
        max_tokens=20,
        description="Should close list bracket"
    ),
    TestCase(
        name="dict_brace",
        category="brackets",
        prompt='data = {"name": "test"',
        expected_patterns=[r"\}"],
        max_tokens=20,
        description="Should close dictionary brace"
    ),
    TestCase(
        name="function_call_chain",
        category="brackets",
        prompt="text.strip().lower(",
        expected_patterns=[r"\)"],
        max_tokens=20,
        description="Should close parenthesis in method chain"
    ),
]

# =============================================================================
# CATEGORY 2: PYTHON INDENTATION
# Tests if the model maintains correct indentation after blocks
# =============================================================================

INDENTATION_TESTS = [
    TestCase(
        name="if_indent",
        category="indentation",
        prompt="if x > 0:\n",
        expected_patterns=[r"^    \S", r"^\t\S"],  # Should indent 4 spaces or tab
        max_tokens=30,
        description="Should indent after if statement"
    ),
    TestCase(
        name="for_indent",
        category="indentation",
        prompt="for i in range(10):\n",
        expected_patterns=[r"    \S"],
        max_tokens=30,
        description="Should indent after for loop"
    ),
    TestCase(
        name="def_indent",
        category="indentation",
        prompt="def process(data):\n",
        expected_patterns=[r"    "],
        max_tokens=30,
        description="Should indent function body"
    ),
    TestCase(
        name="class_indent",
        category="indentation",
        prompt="class MyClass:\n",
        expected_patterns=[r"    "],
        max_tokens=30,
        description="Should indent class body"
    ),
    TestCase(
        name="nested_indent",
        category="indentation",
        prompt="def foo():\n    if True:\n",
        expected_patterns=[r"        \S"],  # 8 spaces (double indentation)
        max_tokens=30,
        description="Should maintain nested indentation"
    ),
    TestCase(
        name="try_except_indent",
        category="indentation",
        prompt="try:\n    x = 1\nexcept:\n",
        expected_patterns=[r"    "],
        max_tokens=30,
        description="Should indent except block"
    ),
]

# =============================================================================
# CATEGORY 3: CODE STRUCTURE
# Tests if the model understands common code patterns
# =============================================================================

STRUCTURE_TESTS = [
    TestCase(
        name="return_statement",
        category="structure",
        prompt="def add(a, b):\n    return a",
        expected_patterns=[r"\+\s*b", r"a \+ b"],
        max_tokens=20,
        description="Should complete addition operation"
    ),
    TestCase(
        name="for_loop_pattern",
        category="structure",
        prompt="for i in range(",
        expected_patterns=[r"\d+\)"],  # Number followed by )
        max_tokens=20,
        description="Should complete range() with number"
    ),
    TestCase(
        name="import_statement",
        category="structure",
        prompt="import os\nimport sys\nimport ",
        expected_patterns=[r"[a-z]+"],  # Module name
        forbidden_patterns=[r"^\d"],  # Must not start with digit
        max_tokens=20,
        description="Should suggest valid module name"
    ),
    TestCase(
        name="list_comprehension",
        category="structure",
        prompt="squares = [x**2 for x in ",
        expected_patterns=[r"range\(|list\(|\["],
        max_tokens=30,
        description="Should complete list comprehension"
    ),
    TestCase(
        name="method_definition",
        category="structure",
        prompt="class Dog:\n    def __init__(self",
        expected_patterns=[r"\)", r":"],
        max_tokens=30,
        description="Should complete __init__ definition"
    ),
    TestCase(
        name="conditional_else",
        category="structure",
        prompt="if condition:\n    do_something()\nelse",
        expected_patterns=[r":"],
        max_tokens=20,
        description="Should add ':' after else"
    ),
]

# =============================================================================
# CATEGORY 4: LONG CONTEXT
# Tests if the model maintains coherence in longer code
# =============================================================================

LONG_CONTEXT_TESTS = [
    TestCase(
        name="function_body",
        category="long_context",
        prompt="""def calculate_average(numbers):
    if not numbers:
        return 0
    total = 0
    for num in numbers:
        total +="""
    ,
        expected_patterns=[r"num"],  # Should use loop variable
        max_tokens=20,
        description="Should recall loop variable"
    ),
    TestCase(
        name="class_method_reference",
        category="long_context",
        prompt="""class Calculator:
    def __init__(self):
        self.result = 0
    
    def add(self, value):
        self.result +="""
    ,
        expected_patterns=[r"value"],  # Should use parameter
        max_tokens=20,
        description="Should reference method parameter"
    ),
    TestCase(
        name="variable_reuse",
        category="long_context",
        prompt="""data = load_file("input.txt")
processed = clean_data(data)
result = analyze("""
    ,
        expected_patterns=[r"processed|data"],  # Should use defined variable
        max_tokens=20,
        description="Should reuse previously defined variable"
    ),
]

# =============================================================================
# CATEGORY 5: PYTHON IDIOMS
# Tests knowledge of Python idioms
# =============================================================================

PYTHON_IDIOM_TESTS = [
    TestCase(
        name="with_statement",
        category="python_idioms",
        prompt='with open("file.txt", "r") as',
        expected_patterns=[r"f:|file:|handle:"],
        max_tokens=20,
        description="Should complete with statement"
    ),
    TestCase(
        name="f_string",
        category="python_idioms",
        prompt='name = "World"\ngreeting = f"Hello, {',
        expected_patterns=[r"name"],
        max_tokens=20,
        description="Should use variable in f-string"
    ),
    TestCase(
        name="lambda",
        category="python_idioms",
        prompt="double = lambda x:",
        expected_patterns=[r"x\s*\*\s*2|2\s*\*\s*x"],
        max_tokens=20,
        description="Should complete lambda correctly"
    ),
    TestCase(
        name="enumerate",
        category="python_idioms",
        prompt="for i, item in enumerate(",
        expected_patterns=[r"[a-z_]+\)"],  # iterable followed by )
        max_tokens=20,
        description="Should complete enumerate"
    ),
]


def get_all_test_cases() -> List[TestCase]:
    """Returns all test cases."""
    return (
        BRACKET_TESTS +
        INDENTATION_TESTS +
        STRUCTURE_TESTS +
        LONG_CONTEXT_TESTS +
        PYTHON_IDIOM_TESTS
    )


def get_tests_by_category(category: str) -> List[TestCase]:
    """Returns tests for a specific category."""
    all_tests = get_all_test_cases()
    return [t for t in all_tests if t.category == category]


def get_categories() -> List[str]:
    """Returns list of available categories."""
    return [
        "brackets",
        "indentation",
        "structure",
        "long_context",
        "python_idioms"
    ]


if __name__ == '__main__':
    # List all available tests
    print("📋 Available Test Cases:")
    print("=" * 60)
    
    for category in get_categories():
        tests = get_tests_by_category(category)
        print(f"\n[{category.upper()}] ({len(tests)} tests)")
        for test in tests:
            print(f"  • {test.name}: {test.description}")
    
    print(f"\n📊 Total: {len(get_all_test_cases())} tests")