Tavernari's picture
Upload folder using huggingface_hub
148b631 verified
"""
test_cases.py - Test cases for code completion validation.
Defines specific tests to evaluate if RippleGPT understands
hierarchical code structures.
"""
from dataclasses import dataclass
from typing import List, Callable, Optional
import re
@dataclass
class TestCase:
"""Represents a code completion test case."""
name: str
category: str
prompt: str
expected_patterns: List[str] # Regex patterns that MUST appear in output
forbidden_patterns: List[str] = None # Patterns that MUST NOT appear
max_tokens: int = 50
description: str = ""
def __post_init__(self):
if self.forbidden_patterns is None:
self.forbidden_patterns = []
# =============================================================================
# CATEGORY 1: BRACKET CLOSING
# Tests if the model can close parentheses, braces, and brackets
# =============================================================================
BRACKET_TESTS = [
TestCase(
name="simple_parenthesis",
category="brackets",
prompt="def hello(name",
expected_patterns=[r"\)"], # Should close parenthesis
max_tokens=20,
description="Should close simple function parenthesis"
),
TestCase(
name="multiple_args",
category="brackets",
prompt="def calculate(a, b, c",
expected_patterns=[r"\)", r":"], # Should close and add ':'
max_tokens=20,
description="Should close parenthesis with multiple arguments"
),
TestCase(
name="nested_parenthesis",
category="brackets",
prompt="result = sum(range(10",
expected_patterns=[r"\)\)"], # Should close both
max_tokens=20,
description="Should close nested parentheses"
),
TestCase(
name="list_bracket",
category="brackets",
prompt="items = [1, 2, 3",
expected_patterns=[r"\]"],
max_tokens=20,
description="Should close list bracket"
),
TestCase(
name="dict_brace",
category="brackets",
prompt='data = {"name": "test"',
expected_patterns=[r"\}"],
max_tokens=20,
description="Should close dictionary brace"
),
TestCase(
name="function_call_chain",
category="brackets",
prompt="text.strip().lower(",
expected_patterns=[r"\)"],
max_tokens=20,
description="Should close parenthesis in method chain"
),
]
# =============================================================================
# CATEGORY 2: PYTHON INDENTATION
# Tests if the model maintains correct indentation after blocks
# =============================================================================
INDENTATION_TESTS = [
TestCase(
name="if_indent",
category="indentation",
prompt="if x > 0:\n",
expected_patterns=[r"^ \S", r"^\t\S"], # Should indent 4 spaces or tab
max_tokens=30,
description="Should indent after if statement"
),
TestCase(
name="for_indent",
category="indentation",
prompt="for i in range(10):\n",
expected_patterns=[r" \S"],
max_tokens=30,
description="Should indent after for loop"
),
TestCase(
name="def_indent",
category="indentation",
prompt="def process(data):\n",
expected_patterns=[r" "],
max_tokens=30,
description="Should indent function body"
),
TestCase(
name="class_indent",
category="indentation",
prompt="class MyClass:\n",
expected_patterns=[r" "],
max_tokens=30,
description="Should indent class body"
),
TestCase(
name="nested_indent",
category="indentation",
prompt="def foo():\n if True:\n",
expected_patterns=[r" \S"], # 8 spaces (double indentation)
max_tokens=30,
description="Should maintain nested indentation"
),
TestCase(
name="try_except_indent",
category="indentation",
prompt="try:\n x = 1\nexcept:\n",
expected_patterns=[r" "],
max_tokens=30,
description="Should indent except block"
),
]
# =============================================================================
# CATEGORY 3: CODE STRUCTURE
# Tests if the model understands common code patterns
# =============================================================================
STRUCTURE_TESTS = [
TestCase(
name="return_statement",
category="structure",
prompt="def add(a, b):\n return a",
expected_patterns=[r"\+\s*b", r"a \+ b"],
max_tokens=20,
description="Should complete addition operation"
),
TestCase(
name="for_loop_pattern",
category="structure",
prompt="for i in range(",
expected_patterns=[r"\d+\)"], # Number followed by )
max_tokens=20,
description="Should complete range() with number"
),
TestCase(
name="import_statement",
category="structure",
prompt="import os\nimport sys\nimport ",
expected_patterns=[r"[a-z]+"], # Module name
forbidden_patterns=[r"^\d"], # Must not start with digit
max_tokens=20,
description="Should suggest valid module name"
),
TestCase(
name="list_comprehension",
category="structure",
prompt="squares = [x**2 for x in ",
expected_patterns=[r"range\(|list\(|\["],
max_tokens=30,
description="Should complete list comprehension"
),
TestCase(
name="method_definition",
category="structure",
prompt="class Dog:\n def __init__(self",
expected_patterns=[r"\)", r":"],
max_tokens=30,
description="Should complete __init__ definition"
),
TestCase(
name="conditional_else",
category="structure",
prompt="if condition:\n do_something()\nelse",
expected_patterns=[r":"],
max_tokens=20,
description="Should add ':' after else"
),
]
# =============================================================================
# CATEGORY 4: LONG CONTEXT
# Tests if the model maintains coherence in longer code
# =============================================================================
LONG_CONTEXT_TESTS = [
TestCase(
name="function_body",
category="long_context",
prompt="""def calculate_average(numbers):
if not numbers:
return 0
total = 0
for num in numbers:
total +="""
,
expected_patterns=[r"num"], # Should use loop variable
max_tokens=20,
description="Should recall loop variable"
),
TestCase(
name="class_method_reference",
category="long_context",
prompt="""class Calculator:
def __init__(self):
self.result = 0
def add(self, value):
self.result +="""
,
expected_patterns=[r"value"], # Should use parameter
max_tokens=20,
description="Should reference method parameter"
),
TestCase(
name="variable_reuse",
category="long_context",
prompt="""data = load_file("input.txt")
processed = clean_data(data)
result = analyze("""
,
expected_patterns=[r"processed|data"], # Should use defined variable
max_tokens=20,
description="Should reuse previously defined variable"
),
]
# =============================================================================
# CATEGORY 5: PYTHON IDIOMS
# Tests knowledge of Python idioms
# =============================================================================
PYTHON_IDIOM_TESTS = [
TestCase(
name="with_statement",
category="python_idioms",
prompt='with open("file.txt", "r") as',
expected_patterns=[r"f:|file:|handle:"],
max_tokens=20,
description="Should complete with statement"
),
TestCase(
name="f_string",
category="python_idioms",
prompt='name = "World"\ngreeting = f"Hello, {',
expected_patterns=[r"name"],
max_tokens=20,
description="Should use variable in f-string"
),
TestCase(
name="lambda",
category="python_idioms",
prompt="double = lambda x:",
expected_patterns=[r"x\s*\*\s*2|2\s*\*\s*x"],
max_tokens=20,
description="Should complete lambda correctly"
),
TestCase(
name="enumerate",
category="python_idioms",
prompt="for i, item in enumerate(",
expected_patterns=[r"[a-z_]+\)"], # iterable followed by )
max_tokens=20,
description="Should complete enumerate"
),
]
def get_all_test_cases() -> List[TestCase]:
"""Returns all test cases."""
return (
BRACKET_TESTS +
INDENTATION_TESTS +
STRUCTURE_TESTS +
LONG_CONTEXT_TESTS +
PYTHON_IDIOM_TESTS
)
def get_tests_by_category(category: str) -> List[TestCase]:
"""Returns tests for a specific category."""
all_tests = get_all_test_cases()
return [t for t in all_tests if t.category == category]
def get_categories() -> List[str]:
"""Returns list of available categories."""
return [
"brackets",
"indentation",
"structure",
"long_context",
"python_idioms"
]
if __name__ == '__main__':
# List all available tests
print("📋 Available Test Cases:")
print("=" * 60)
for category in get_categories():
tests = get_tests_by_category(category)
print(f"\n[{category.upper()}] ({len(tests)} tests)")
for test in tests:
print(f" • {test.name}: {test.description}")
print(f"\n📊 Total: {len(get_all_test_cases())} tests")