Upload 7 files

e8a0a6a verified 9 months ago

153 kB

	# recursive_swe_bench/task_generators/bug_fixing.py

	from typing import Any, Dict, List, Optional, Tuple, Set, Union
	import uuid
	import json
	import re
	import random
	import ast
	import copy
	from pathlib import Path
	import tempfile
	import subprocess
	import shutil
	import os

	from recursive_swe_bench.core.recursive_task import (
	RecursiveTask, ProblemState, EvaluationResult, Feedback, TaskStatus
	)

	class BugCategory:
	"""Categories of bugs for classification and evolution."""
	SYNTAX = "syntax"
	LOGICAL = "logical"
	PERFORMANCE = "performance"
	SECURITY = "security"
	CONCURRENCY = "concurrency"
	EXCEPTION_HANDLING = "exception_handling"
	API_USAGE = "api_usage"
	MEMORY_MANAGEMENT = "memory_management"
	TYPE_ERROR = "type_error"
	EDGE_CASE = "edge_case"
	DATA_HANDLING = "data_handling"
	DEPENDENCY = "dependency"


	class BugFixingTask(RecursiveTask):
	"""
	A recursive task for evaluating how models fix bugs in code.

	The task presents a piece of code with one or more bugs, and evolves
	based on the model's fix attempts. As the model addresses issues,
	the task may introduce more subtle bugs, change requirements, or
	increase complexity to test adaptive problem-solving.
	"""

	def __init__(
	self,
	initial_state: ProblemState,
	config: Dict[str, Any] = None,
	test_runner: Any = None
	):
	"""
	Initialize the bug fixing task.

	Args:
	initial_state: The initial problem state
	config: Configuration options
	test_runner: Custom test runner (optional)
	"""
	super().__init__(initial_state, config)
	self.test_runner = test_runner or DefaultTestRunner()
	self.bug_categories: Set[str] = set(
	self.config.get("bug_categories", [BugCategory.LOGICAL, BugCategory.SYNTAX])
	)
	self.difficulty_progression = self.config.get(
	"difficulty_progression", [0.0, 0.15, 0.3, 0.5, 0.7]
	)
	self.evolution_strategies = self.config.get(
	"evolution_strategies", ["add_subtle_bug", "change_requirements", "increase_complexity"]
	)

	def _run_evaluation(self, solution: str) -> EvaluationResult:
	"""
	Run tests to evaluate the solution.

	Args:
	solution: The solution code

	Returns:
	Evaluation results
	"""
	# Create a temporary directory to run tests
	with tempfile.TemporaryDirectory() as temp_dir:
	temp_path = Path(temp_dir)

	# Write solution code to file
	solution_file = temp_path / "solution.py"
	with open(solution_file, "w") as f:
	f.write(solution)

	# Create test files
	test_files = self._create_test_files(temp_path)

	# Run tests
	results = self.test_runner.run_tests(
	solution_file=solution_file,
	test_files=test_files,
	code_context=self.state.code_context
	)

	# Calculate score based on test results
	score = self._calculate_score(results)

	return EvaluationResult(
	success=results["all_passed"],
	score=score,
	execution_results=results["execution"],
	error_details=results.get("errors"),
	test_results=results["tests"],
	metrics={
	"passed_tests": results["passed_tests"],
	"total_tests": results["total_tests"],
	"execution_time": results["execution_time"],
	"memory_usage": results.get("memory_usage", 0),
	"code_complexity": self._calculate_complexity(solution)
	}
	)

	def _generate_feedback(self, solution: str, result: EvaluationResult) -> Feedback:
	"""
	Generate structured feedback based on evaluation results.

	Args:
	solution: The solution code
	result: The evaluation results

	Returns:
	Structured feedback
	"""
	issues = []
	suggestions = []
	focus_areas = []

	# Add issues for failing tests
	if result.test_results:
	for test_name, test_result in result.test_results.items():
	if not test_result["passed"]:
	issues.append({
	"type": "test_failure",
	"test": test_name,
	"message": test_result.get("message", "Test failed"),
	"expected": test_result.get("expected"),
	"actual": test_result.get("actual")
	})

	# Add issues for errors
	if result.error_details:
	for error_type, error_info in result.error_details.items():
	issues.append({
	"type": "error",
	"error_type": error_type,
	"message": error_info.get("message", "An error occurred"),
	"location": error_info.get("location")
	})

	# Generate suggestions based on issues
	for issue in issues:
	if issue["type"] == "test_failure":
	suggestion = self._generate_suggestion_for_test_failure(
	issue, solution, result.test_results
	)
	if suggestion:
	suggestions.append(suggestion)
	elif issue["type"] == "error":
	suggestion = self._generate_suggestion_for_error(
	issue, solution
	)
	if suggestion:
	suggestions.append(suggestion)

	# Determine focus areas based on issues and task state
	focus_areas = self._determine_focus_areas(issues, solution, result)

	# Generate adaptation hints based on the current state and results
	adaptation_hints = self._generate_adaptation_hints(solution, result)

	# Create summary
	if result.success:
	summary = (
	f"Your solution passes all tests with a score of {result.score:.2f}. "
	f"The code successfully addresses the bugs in the original implementation."
	)
	else:
	passed = result.metrics.get("passed_tests", 0)
	total = result.metrics.get("total_tests", 0)
	summary = (
	f"Your solution passes {passed}/{total} tests with a score of {result.score:.2f}. "
	f"There are still issues that need to be addressed."
	)

	return Feedback(
	summary=summary,
	issues=issues,
	suggestions=suggestions,
	focus_areas=focus_areas,
	adaptation_hints=adaptation_hints
	)

	def _evolve_state(self, solution: str, result: EvaluationResult, feedback: Feedback) -> ProblemState:
	"""
	Evolve the problem state based on the solution and feedback.

	This method implements the recursive nature of the benchmark by
	adapting the problem to challenge the model's understanding.

	Args:
	solution: The attempted solution
	result: The evaluation results
	feedback: The feedback provided

	Returns:
	The evolved problem state
	"""
	# If the solution perfectly solved the problem, make it more challenging
	if result.success and result.score > 0.95:
	return self._increase_difficulty(solution, result, feedback)

	# If the solution was close but not perfect, focus on the remaining issues
	elif result.score > 0.7:
	return self._focus_remaining_issues(solution, result, feedback)

	# If the solution was not very good, provide more guidance
	else:
	return self._provide_more_guidance(solution, result, feedback)

	def _increase_difficulty(self, solution: str, result: EvaluationResult, feedback: Feedback) -> ProblemState:
	"""
	Increase the difficulty of the problem for models that solved it well.

	Args:
	solution: The successful solution
	result: The evaluation results
	feedback: The feedback provided

	Returns:
	The evolved problem state with increased difficulty
	"""
	# Create a new state based on the current state
	new_state = copy.deepcopy(self.state)

	# Increment evolution stage
	new_state.evolution_stage += 1

	# Increase difficulty based on progression schedule
	current_difficulty_idx = min(new_state.evolution_stage,
	len(self.difficulty_progression) - 1)
	new_state.difficulty = self.difficulty_progression[current_difficulty_idx]

	# Select an evolution strategy based on the current state
	strategy = self._select_evolution_strategy(solution, result, feedback)

	# Apply the selected strategy
	if strategy == "add_subtle_bug":
	self._add_subtle_bug(new_state, solution)
	elif strategy == "change_requirements":
	self._change_requirements(new_state, solution)
	elif strategy == "increase_complexity":
	self._increase_complexity(new_state, solution)

	# Update the description to reflect the changes
	new_state.description = self._generate_description(new_state)

	# Update adaptation vector to guide future evolution
	new_state.adaptation_vector = self._calculate_adaptation_vector(
	solution, result, feedback
	)

	return new_state

	def _focus_remaining_issues(self, solution: str, result: EvaluationResult, feedback: Feedback) -> ProblemState:
	"""
	Evolve the state to focus on remaining issues when the solution is close but not perfect.

	Args:
	solution: The nearly-successful solution
	result: The evaluation results
	feedback: The feedback provided

	Returns:
	The evolved problem state focusing on remaining issues
	"""
	# Create a new state based on the current state
	new_state = copy.deepcopy(self.state)

	# Increment evolution stage
	new_state.evolution_stage += 1

	# Maintain the same difficulty level
	current_difficulty_idx = min(new_state.evolution_stage - 1,
	len(self.difficulty_progression) - 1)
	new_state.difficulty = self.difficulty_progression[current_difficulty_idx]

	# Update the code context to focus on remaining issues
	new_state.code_context["focus_areas"] = feedback.focus_areas

	# Highlight failing tests in the code context
	if result.test_results:
	failing_tests = [
	test_name for test_name, test_result in result.test_results.items()
	if not test_result["passed"]
	]
	new_state.code_context["failing_tests"] = failing_tests

	# Update the description to be more specific about remaining issues
	new_state.description = self._generate_focused_description(
	new_state, feedback.issues
	)

	# Update adaptation vector to guide future evolution
	new_state.adaptation_vector = self._calculate_adaptation_vector(
	solution, result, feedback
	)

	return new_state

	def _provide_more_guidance(self, solution: str, result: EvaluationResult, feedback: Feedback) -> ProblemState:
	"""
	Evolve the state to provide more guidance when the solution was not very good.

	Args:
	solution: The unsuccessful solution
	result: The evaluation results
	feedback: The feedback provided

	Returns:
	The evolved problem state with more guidance
	"""
	# Create a new state based on the current state
	new_state = copy.deepcopy(self.state)

	# Increment evolution stage
	new_state.evolution_stage += 1

	# Maintain or slightly decrease difficulty
	current_difficulty_idx = max(0, min(new_state.evolution_stage - 1,
	len(self.difficulty_progression) - 1) - 1)
	new_state.difficulty = self.difficulty_progression[current_difficulty_idx]

	# Add more hints to the code context
	new_state.code_context["hints"] = self._generate_hints(
	solution, result, feedback
	)

	# Add more detailed information about failing tests
	if result.test_results:
	detailed_test_results = {}
	for test_name, test_result in result.test_results.items():
	if not test_result["passed"]:
	detailed_test_results[test_name] = {
	"message": test_result.get("message", "Test failed"),
	"expected": test_result.get("expected"),
	"actual": test_result.get("actual"),
	"hint": self._generate_test_hint(test_name, test_result)
	}
	new_state.code_context["detailed_test_results"] = detailed_test_results

	# Update the description to include more guidance
	new_state.description = self._generate_guided_description(
	new_state, feedback.issues, feedback.suggestions
	)

	# Update adaptation vector to guide future evolution
	new_state.adaptation_vector = self._calculate_adaptation_vector(
	solution, result, feedback
	)

	return new_state

	def _select_evolution_strategy(self, solution: str, result: EvaluationResult, feedback: Feedback) -> str:
	"""
	Select an evolution strategy based on the current state and solution.

	Args:
	solution: The current solution
	result: The evaluation results
	feedback: The feedback provided

	Returns:
	The selected evolution strategy
	"""
	available_strategies = self.evolution_strategies.copy()

	# Weight the strategies based on the current state
	weights = {}

	# Prefer adding subtle bugs if the solution is very good
	if result.score > 0.95:
	weights["add_subtle_bug"] = 0.6
	weights["change_requirements"] = 0.3
	weights["increase_complexity"] = 0.1

	# Prefer changing requirements if we've already added several bugs
	elif self.state.evolution_stage >= 2 and "bug_count" in self.state.code_context and self.state.code_context["bug_count"] >= 3:
	weights["add_subtle_bug"] = 0.1
	weights["change_requirements"] = 0.7
	weights["increase_complexity"] = 0.2

	# Prefer increasing complexity if the solution is good but not perfect
	elif result.score > 0.85:
	weights["add_subtle_bug"] = 0.2
	weights["change_requirements"] = 0.2
	weights["increase_complexity"] = 0.6

	# Default to equal weights
	else:
	weights = {strategy: 1.0 / len(available_strategies)
	for strategy in available_strategies}

	# Normalize weights for available strategies
	total_weight = sum(weights.get(strategy, 0) for strategy in available_strategies)
	normalized_weights = [weights.get(strategy, 0) / total_weight
	for strategy in available_strategies]

	# Select a strategy based on weights
	return random.choices(available_strategies, weights=normalized_weights)[0]

	def _add_subtle_bug(self, state: ProblemState, solution: str) -> None:
	"""
	Add a subtle bug to the solution code.

	Args:
	state: The problem state to modify
	solution: The current solution
	"""
	# Parse the solution to find potential bug insertion points
	try:
	parsed_solution = ast.parse(solution)
	except SyntaxError:
	# If we can't parse the solution, just add a syntax error
	self._add_syntax_error(state, solution)
	return

	# Choose a bug category based on available categories
	available_categories = list(self.bug_categories)
	if available_categories:
	bug_category = random.choice(available_categories)
	else:
	bug_category = BugCategory.LOGICAL

	# Add a bug based on the selected category
	if bug_category == BugCategory.SYNTAX:
	self._add_syntax_error(state, solution)
	elif bug_category == BugCategory.LOGICAL:
	self._add_logical_error(state, solution, parsed_solution)
	elif bug_category == BugCategory.PERFORMANCE:
	self._add_performance_issue(state, solution, parsed_solution)
	elif bug_category == BugCategory.EDGE_CASE:
	self._add_edge_case_issue(state, solution, parsed_solution)
	else:
	# Default to logical error
	self._add_logical_error(state, solution, parsed_solution)

	# Update bug count in code context
	if "bug_count" not in state.code_context:
	state.code_context["bug_count"] = 0
	state.code_context["bug_count"] += 1

	# Add the bug category to the context
	if "bug_categories" not in state.code_context:
	state.code_context["bug_categories"] = []
	state.code_context["bug_categories"].append(bug_category)

	def _change_requirements(self, state: ProblemState, solution: str) -> None:
	"""
	Change the requirements to challenge the current solution.

	Args:
	state: The problem state to modify
	solution: The current solution
	"""
	# Get the current requirements
	requirements = state.requirements

	# Add a new requirement
	new_requirement = self._generate_new_requirement(state, solution)
	if new_requirement:
	requirements.append(new_requirement)

	# Modify an existing requirement if possible
	if requirements and random.random() < 0.5:
	idx = random.randint(0, len(requirements) - 1)
	requirements[idx] = self._modify_requirement(requirements[idx], state, solution)

	def _increase_complexity(self, state: ProblemState, solution: str) -> None:
	"""
	Increase the complexity of the task.

	Args:
	state: The problem state to modify
	solution: The current solution
	"""
	# Parse the solution if possible
	try:
	parsed_solution = ast.parse(solution)
	except SyntaxError:
	# If we can't parse the solution, make a simpler change
	self._add_edge_case_requirement(state)
	return

	# Choose a complexity increase strategy
	strategies = [
	"add_edge_cases",
	"increase_data_volume",
	"add_performance_constraint",
	"expand_functionality"
	]

	strategy = random.choice(strategies)

	if strategy == "add_edge_cases":
	self._add_edge_case_requirement(state)
	elif strategy == "increase_data_volume":
	self._increase_data_volume(state, solution)
	elif strategy == "add_performance_constraint":
	self._add_performance_constraint(state, solution)
	elif strategy == "expand_functionality":
	self._expand_functionality(state, solution)

	def _create_test_files(self, temp_path: Path) -> List[Path]:
	"""
	Create test files based on the current problem state.

	Args:
	temp_path: The temporary directory path

	Returns:
	List of test file paths
	"""
	test_files = []

	# Create test files from the code context
	if "tests" in self.state.code_context:
	for i, test in enumerate(self.state.code_context["tests"]):
	test_file = temp_path / f"test_{i}.py"
	with open(test_file, "w") as f:
	f.write(test["content"])
	test_files.append(test_file)

	# Create a default test file if no tests are specified
	if not test_files:
	test_file = temp_path / "test_default.py"
	with open(test_file, "w") as f:
	f.write(self._generate_default_test())
	test_files.append(test_file)

	return test_files

	def _calculate_score(self, results: Dict[str, Any]) -> float:
	"""
	Calculate a score based on test results.

	Args:
	results: The test results

	Returns:
	A score between 0 and 1
	"""
	# Base score on test results
	if results["total_tests"] == 0:
	test_score = 0.0
	else:
	test_score = results["passed_tests"] / results["total_tests"]

	# Adjust for execution success
	execution_score = 1.0 if results["execution"]["success"] else 0.0

	# Combine scores with weights
	weights = self.config.get("score_weights", {"test": 0.7, "execution": 0.3})
	score = (test_score * weights["test"] + execution_score * weights["execution"])

	# Apply difficulty modifier
	difficulty_modifier = 1.0 + (self.state.difficulty * 0.2)
	score = score / difficulty_modifier

	return max(0.0, min(1.0, score))

	def _calculate_complexity(self, code: str) -> float:
	"""
	Calculate the complexity of code.

	Args:
	code: The code to analyze

	Returns:
	A complexity score
	"""
	# Simple cyclomatic complexity estimation
	complexity = 1

	# Count control flow statements
	for pattern in ["if", "for", "while", "and", "or"]:
	complexity += code.count(f" {pattern} ")

	# Count function definitions
	complexity += code.count("def ")

	# Normalize to 0-1 range
	normalized = min(1.0, complexity / 50.0)

	return normalized

	def _generate_suggestion_for_test_failure(
	self,
	issue: Dict[str, Any],
	solution: str,
	test_results: Dict[str, Any]
	) -> Dict[str, Any]:
	"""
	Generate a suggestion for a test failure.

	Args:
	issue: The issue data
	solution: The solution code
	test_results: The test results

	Returns:
	A suggestion dictionary
	"""
	test_name = issue["test"]
	test_result = test_results[test_name]

	# Extract relevant parts of the test
	test_content = None
	for test in self.state.code_context.get("tests", []):
	if test.get("name") == test_name:
	test_content = test.get("content")
	break

	if test_content:
	# Try to extract the assertion that failed
	assertion_match = re.search(r"assert.*", test_content)
	assertion = assertion_match.group(0) if assertion_match else None

	# Look for function names in both test and solution
	test_funcs = re.findall(r"def\s+(\w+)", test_content)
	solution_funcs = re.findall(r"def\s+(\w+)", solution)

	# Find functions in test that aren't in solution
	missing_funcs = [f for f in test_funcs if f not in solution_funcs]

	if missing_funcs:
	return {
	"type": "missing_function",
	"message": f"Implement the missing function(s): {', '.join(missing_funcs)}",
	"functions": missing_funcs
	}
	elif assertion:
	return {
	"type": "fix_assertion_failure",
	"message": f"Fix the code to pass the assertion: {assertion}",
	"assertion": assertion,
	"expected": test_result.get("expected"),
	"actual": test_result.get("actual")
	}
	else:
	return {
	"type": "fix_test_failure",
	"message": f"Fix the code to pass the test: {test_name}",
	"test_name": test_name
	}
	else:
	return {
	"type": "general_fix",
	"message": f"Fix the code to pass the failing test: {test_name}"
	}

	def _generate_suggestion_for_error(
	self,
	issue: Dict[str, Any],
	solution: str
	) -> Dict[str, Any]:
	"""
	Generate a suggestion for an error.

	Args:
	issue: The issue data
	solution: The solution code

	Returns:
	A suggestion dictionary
	"""
	error_type = issue["error_type"]
	message = issue["message"]
	location = issue.get("location")

	if error_type == "syntax":
	return {
	"type": "fix_syntax",
	"message": f"Fix the syntax error: {message}",
	"location": location
	}
	elif error_type == "runtime":
	return {
	"type": "fix_runtime_error",
	"message": f"Fix the runtime error: {message}",
	"location": location
	}
	else:
	return {
	"type": "fix_error",
	"message": f"Fix the error: {message}",
	"error_type": error_type,
	"location": location
	}

	def _determine_focus_areas(
	self,
	issues: List[Dict[str, Any]],
	solution: str,
	result: EvaluationResult
	) -> List[str]:
	"""
	Determine focus areas based on issues and results.

	Args:
	issues: The identified issues
	solution: The solution code
	result: The evaluation results

	Returns:
	List of focus areas
	"""
	focus_areas = []

	# Check for syntax issues
	syntax_issues = [i for i in issues if i.get("error_type") == "syntax"]
	if syntax_issues:
	focus_areas.append("syntax")

	# Check for failing tests
	test_issues = [i for i in issues if i["type"] == "test_failure"]
	if test_issues:
	if any("expected" in i and "actual" in i for i in test_issues):
	focus_areas.append("logic")
	else:
	focus_areas.append("functionality")

	# Check for performance issues
	if result.metrics and "execution_time" in result.metrics:
	if result.metrics["execution_time"] > self.config.get("performance_threshold", 1.0):
	focus_areas.append("performance")

	# Check for complexity issues
	if result.metrics and "code_complexity" in result.metrics:
	if result.metrics["code_complexity"] > self.config.get("complexity_threshold", 0.7):
	focus_areas.append("complexity")

	# Default focus area if none were identified
	if not focus_areas:
	focus_areas.append("general")

	return focus_areas

	def _generate_adaptation_hints(
	self,
	solution: str,
	result: EvaluationResult
	) -> List[Dict[str, Any]]:
	"""
	Generate hints about how the problem might adapt in the next iteration.

	Args:
	solution: The solution code
	result: The evaluation results

	Returns:
	List of adaptation hints
	"""
	hints = []

	# Hint about potential complexity increases
	if result.score > 0.8:
	hints.append({
	"type": "complexity_increase",
	"message": "The problem may become more complex in the next iteration."
	})

	# Hint about potential requirement changes
	if result.score > 0.9 and self.state.evolution_stage >= 1:
	hints.append({
	"type": "requirement_change",
	"message": "The requirements may change in the next iteration."
	})

	# Hint about potential bug additions
	if result.score > 0.95:
	hints.append({
	"type": "new_bugs",
	"message": "New, more subtle bugs may be introduced in the next iteration."
	})

	# Hint about focus on specific areas
	if result.score > 0.7 and result.score < 0.95:
	focus_areas = result.metrics.get("focus_areas", [])
	if focus_areas:
	hints.append({
	"type": "focus_shift",
	"message": f"The next iteration may focus more on: {', '.join(focus_areas)}",
	"areas": focus_areas
	})

	return hints

	def _generate_description(self, state: ProblemState) -> str:
	"""
	Generate a description for the current problem state.

	Args:
	state: The problem state

	Returns:
	A descriptive prompt for the problem
	"""
	# Base description
	base_desc = (
	f"Fix the bug(s) in the following code. "
	f"This is iteration {state.evolution_stage + 1} of the task."
	)

	# Add information about known bug categories
	if "bug_categories" in state.code_context:
	categories = state.code_context["bug_categories"]
	if categories:
	base_desc += f"\n\nThe code contains the following types of issues: {', '.join(categories)}."

	# Add requirements
	if state.requirements:
	base_desc += "\n\nRequirements:"
	for i, req in enumerate(state.requirements):
	base_desc += f"\n{i+1}. {req['description']}"

	# Add information about difficulty
	difficulty_desc = "easy"
	if state.difficulty > 0.3 and state.difficulty <= 0.6:
	difficulty_desc = "moderate"
	elif state.difficulty > 0.6 and state.difficulty <= 0.8:
	difficulty_desc = "challenging"
	elif state.difficulty > 0.8:
	difficulty_desc = "very challenging"

	base_desc += f"\n\nThis is a {difficulty_desc} bug fixing task."

	return base_desc

	def _generate_focused_description(self, state: ProblemState, issues: List[Dict[str, Any]]) -> str:
	"""
	Generate a description focused on remaining issues.

	Args:
	state: The problem state
	issues: The identified issues

	Returns:
	A descriptive prompt focused on remaining issues
	"""
	base_desc = self._generate_description(state)

	# Add focus on remaining issues
	if issues:
	base_desc += "\n\nFocus on the following issues:"
	for i, issue in enumerate(issues):
	if issue["type"] == "test_failure":
	base_desc += f"\n{i+1}. Test failure in '{issue['test']}': {issue['message']}"
	else:
	base_desc += f"\n{i+1}. {issue['error_type']} error: {issue['message']}"

	# Add focus areas if present
	if "focus_areas" in state.code_context:
	areas = state.code_context["focus_areas"]
	if areas:
	base_desc += f"\n\nPay particular attention to: {', '.join(areas)}."

	return base_desc

	def _generate_guided_description(
	self,
	state: ProblemState,
	issues: List[Dict[str, Any]],
	suggestions: List[Dict[str, Any]]
	) -> str:
	"""
	Generate a description with added guidance.

	Args:
	state: The problem state
	issues: The identified issues
	suggestions: The suggested fixes

	Returns:
	A descriptive prompt with added guidance
	"""
	base_desc = self._generate_description(state)

	# Add detailed information about issues
	if issues:
	base_desc += "\n\nThe following issues were identified in your previous solution:"
	for i, issue in enumerate(issues):
	if issue["type"] == "test_failure":
	base_desc += f"\n{i+1}. Test failure in '{issue['test']}': {issue['message']}"
	if "expected" in issue and "actual" in issue:
	base_desc += f"\n Expected: {issue['expected']}"
	base_desc += f"\n Actual: {issue['actual']}"
	else:
	base_desc += f"\n{i+1}. {issue['error_type']} error: {issue['message']}"
	if "location" in issue:
	base_desc += f"\n Location: {issue['location']}"

	# Add suggestions
	if suggestions:
	base_desc += "\n\nConsider the following suggestions:"
	for i, suggestion in enumerate(suggestions):
	base_desc += f"\n{i+1}. {suggestion['message']}"

	# Add hints if present
	if "hints" in state.code_context:
	hints = state.code_context["hints"]
	if hints:
	base_desc += "\n\nHints:"
	for i, hint in enumerate(hints):
	base_desc += f"\n{i+1}. {hint}"

	return base_desc

	def _generate_hints(
	self,
	solution: str,
	result: EvaluationResult,
	feedback: Feedback
	) -> List[str]:
	"""
	Generate hints based on the solution and feedback.

	Args:
	solution: The solution code
	result: The evaluation results
	feedback: The feedback provided

	Returns:
	List of hints
	"""
	hints = []

	# Add hints based on failing tests
	if result.test_results:
	failing_tests = [
	test_name for test_name, test_result in result.test_results.items()
	if not test_result["passed"]
	]

	if failing_tests:
	test_hint = "Focus on fixing the failing tests"

	# Add specific information about test expectations if available
	for test_name in failing_tests[:2]: # Limit to first two tests
	test_result = result.test_results[test_name]
	if "expected" in test_result and "actual" in test_result:
	test_hint += f". For test '{test_name}', expected '{test_result['expected']}' but got '{test_result['actual']}'"

	hints.append(test_hint + ".")

	# Add hints based on errors
	if result.error_details:
	for error_type, error_info in result.error_details.items():
	hints.append(f"Fix the {error_type} error: {error_info.get('message', 'Unknown error')}.")

	# Add hints based on focus areas
	for area in feedback.focus_areas:
	if area == "syntax":
	hints.append("Check your syntax carefully, especially parentheses, indentation, and function definitions.")
	elif area == "logic":
	hints.append("Review the logic of your solution, especially conditional statements and loop conditions.")
	elif area == "functionality":
	hints.append("Ensure your solution implements all required functionality specified in the tests.")
	elif area == "performance":
	hints.append("Consider optimizing your solution for better performance, avoid unnecessary operations.")
	elif area == "complexity":
	hints.append("Try to simplify your solution, it may be more complex than necessary.")

	return hints

	def _generate_test_hint(self, test_name: str, test_result: Dict[str, Any]) -> str:
	"""
	Generate a hint for a specific failing test.

	Args:
	test_name: The name of the test
	test_result: The test result

	Returns:
	A hint for the test
	"""
	if "expected" in test_result and "actual" in test_result:
	return f"The test expected '{test_result['expected']}' but got '{test_result['actual']}'"
	elif "message" in test_result:
	return test_result["message"]
	else:
	return "The test failed, but no detailed information is available."

	def _add_syntax_error(self, state: ProblemState, solution: str) -> None:
	"""
	Add a syntax error to the solution code.

	Args:
	state: The problem state to modify
	solution: The current solution
	"""
	lines = solution.split('\n')
	if not lines:
	return

	# Choose a line to modify
	idx = random.randint(0, len(lines) - 1)
	line = lines[idx]

	# Skip empty lines or comment lines
	while not line.strip() or line.strip().startswith('#'):
	idx = random.randint(0, len(lines) - 1)
	line = lines[idx]

	# Choose a modification type
	mod_type = random.choice([
	"remove_character",
	"add_character",
	"swap_characters",
	"change_indent"
	])

	if mod_type == "remove_character" and line:
	char_idx = random.randint(0, len(line) - 1)
	lines[idx] = line[:char_idx] + line[char_idx+1:]

	elif mod_type == "add_character":
	char_idx = random.randint(0, len(line))
	char = random.choice(["(", ")", "{", "}", "[", "]", ":", ";", ",", "."])
	lines[idx] = line[:char_idx] + char + line[char_idx:]

	elif mod_type == "swap_characters" and len(line) >= 2:
	char_idx = random.randint(0, len(line) - 2)
	lines[idx] = (line[:char_idx] + line[char_idx+1] +
	line[char_idx] + line[char_idx+2:])

	elif mod_type == "change_indent":
	# Either add or remove indentation
	if line.startswith(" "):
	lines[idx] = line[2:] # Remove some indent
	else:
	lines[idx] = " " + line # Add inconsistent indent

	# Update the code
	modified_code = '\n'.join(lines)
	state.code_context["code"] = modified_code

	# Add information about the modification
	if "bugs" not in state.code_context:
	state.code_context["bugs"] = []

	state.code_context["bugs"].append({
	"type": "syntax",
	"line": idx + 1,
	"description": f"Syntax error introduced in line {idx + 1}"
	})

	def _add_logical_error(self, state: ProblemState, solution: str, parsed_solution: ast.Module) -> None:
	"""
	Add a logical error to the solution code.

	Args:
	state: The problem state to modify
	solution: The current solution
	parsed_solution: The parsed AST of the solution
	"""
	modification_types = [
	"change_comparison",
	"invert_condition",
	"off_by_one",
	"change_operator",
	"reverse_logic"
	]

	mod_type = random.choice(modification_types)
	lines = solution.split('\n')

	# Find all if statements and loops
	if_statements = []
	for i, line in enumerate(lines):
	if re.search(r'\bif\b\|\bwhile\b\|\bfor\b', line):
	if_statements.append((i, line))

	if if_statements:
	# Choose an if statement to modify
	idx, line = random.choice(if_
	# recursive_swe_bench/task_generators/bug_fixing.py (continued)

	if if_statements:
	# Choose an if statement to modify
	idx, line = random.choice(if_statements)

	if mod_type == "change_comparison":
	# Change comparison operators
	comparisons = {"==": "!=", "!=": "==", ">": "<", "<": ">", ">=": "<=", "<=": ">="}
	for op, new_op in comparisons.items():
	if op in line:
	lines[idx] = line.replace(op, new_op, 1)
	break

	elif mod_type == "invert_condition":
	# Add or remove a "not" to invert the condition
	if "not" in line:
	lines[idx] = line.replace("not ", "", 1)
	else:
	match = re.search(r'(if\|while)\s+([^:]+):', line)
	if match:
	condition = match.group(2)
	lines[idx] = line.replace(condition, f"not ({condition})", 1)

	elif mod_type == "off_by_one":
	# Introduce an off-by-one error
	for op in ["+", "-"]:
	if op in line:
	# If there's a number after the operator, change it
	match = re.search(f'\\{op}\\s*(\\d+)', line)
	if match:
	num = int(match.group(1))
	new_num = num + 1 if op == "+" else max(0, num - 1)
	lines[idx] = line.replace(f"{op} {num}", f"{op} {new_num}", 1)
	break

	elif mod_type == "change_operator":
	# Change arithmetic or logical operators
	operators = {"+": "-", "-": "+", "": "/", "/": "", "and": "or", "or": "and"}
	for op, new_op in operators.items():
	if f" {op} " in line:
	lines[idx] = line.replace(f" {op} ", f" {new_op} ", 1)
	break

	elif mod_type == "reverse_logic":
	# Reverse the logic of a compound condition
	if " and " in line:
	parts = line.split(" and ")
	lines[idx] = line.replace(" and ".join(parts), " or ".join(parts), 1)
	elif " or " in line:
	parts = line.split(" or ")
	lines[idx] = line.replace(" or ".join(parts), " and ".join(parts), 1)

	else:
	# If no if statements found, introduce a different kind of logical error
	# Find variable assignments
	assignments = []
	for i, line in enumerate(lines):
	if "=" in line and "==" not in line and "!=" not in line:
	assignments.append((i, line))

	if assignments:
	# Choose an assignment to modify
	idx, line = random.choice(assignments)

	# Modify the assignment
	if "+" in line:
	lines[idx] = line.replace("+", "-", 1)
	elif "-" in line:
	lines[idx] = line.replace("-", "+", 1)
	elif "*" in line:
	lines[idx] = line.replace("*", "/", 1)
	elif "/" in line:
	lines[idx] = line.replace("/", "*", 1)
	else:
	# If no arithmetic operator, change the value
	match = re.search(r'=\s*(\d+)', line)
	if match:
	num = int(match.group(1))
	new_num = num + random.choice([-1, 1]) * random.randint(1, 3)
	lines[idx] = line.replace(f"= {num}", f"= {new_num}", 1)

	# Update the code
	modified_code = '\n'.join(lines)
	state.code_context["code"] = modified_code

	# Add information about the modification
	if "bugs" not in state.code_context:
	state.code_context["bugs"] = []

	state.code_context["bugs"].append({
	"type": "logical",
	"line": idx + 1,
	"description": f"Logical error introduced in line {idx + 1}: {mod_type}"
	})

	def _add_performance_issue(self, state: ProblemState, solution: str, parsed_solution: ast.Module) -> None:
	"""
	Add a performance issue to the solution code.

	Args:
	state: The problem state to modify
	solution: The current solution
	parsed_solution: The parsed AST of the solution
	"""
	lines = solution.split('\n')

	# Find loops in the code
	loops = []
	for i, line in enumerate(lines):
	if re.search(r'\bfor\b\|\bwhile\b', line):
	loops.append((i, line))

	if loops:
	# Choose a loop to modify
	idx, line = random.choice(loops)

	# Choose a modification type
	mod_type = random.choice([
	"add_nested_loop",
	"replace_efficient_operation",
	"add_redundant_computation"
	])

	if mod_type == "add_nested_loop":
	# Add a nested loop
	indent = len(line) - len(line.lstrip())
	indent_str = ' ' * indent
	loop_body_indent = indent_str + ' '

	# Find the next line with the same indentation or less
	end_idx = idx + 1
	while end_idx < len(lines) and (not lines[end_idx].strip() or len(lines[end_idx]) - len(lines[end_idx].lstrip()) > indent):
	end_idx += 1

	# Insert a nested loop before the end of the current loop
	insert_pos = end_idx
	lines.insert(insert_pos, f"{loop_body_indent}for _ in range(100): # Unnecessary loop")
	lines.insert(insert_pos + 1, f"{loop_body_indent} pass")

	elif mod_type == "replace_efficient_operation":
	# Replace an efficient operation with a less efficient one
	# Look for list comprehensions or efficient operations
	for i in range(idx + 1, min(idx + 10, len(lines))):
	if "append" in lines[i] or "extend" in lines[i]:
	indent = len(lines[i]) - len(lines[i].lstrip())
	indent_str = ' ' * indent
	match = re.search(r'(\w+)\.(append\|extend)', lines[i])
	if match:
	list_name = match.group(1)
	operation = match.group(2)
	item = lines[i].split(f"{list_name}.{operation}(")[1].split(")")[0]

	if operation == "append":
	# Replace append with concatenation
	lines[i] = f"{indent_str}{list_name} = {list_name} + [{item}] # Less efficient than append"
	elif operation == "extend":
	# Replace extend with concatenation
	lines[i] = f"{indent_str}{list_name} = {list_name} + {item} # Less efficient than extend"
	break

	elif mod_type == "add_redundant_computation":
	# Add redundant computation inside the loop
	# Find the indentation level of the loop body
	if idx + 1 < len(lines):
	body_indent = len(lines[idx + 1]) - len(lines[idx + 1].lstrip())
	body_indent_str = ' ' * body_indent

	# Add redundant computation
	lines.insert(idx + 1, f"{body_indent_str}temp = [] # Redundant computation")
	lines.insert(idx + 2, f"{body_indent_str}for i in range(1000):")
	lines.insert(idx + 3, f"{body_indent_str} temp.append(i)")
	lines.insert(idx + 4, f"{body_indent_str} temp.sort() # Unnecessary sort in each iteration")

	else:
	# If no loops found, introduce inefficient data structure or algorithm
	function_defs = []
	for i, line in enumerate(lines):
	if line.strip().startswith("def "):
	function_defs.append((i, line))

	if function_defs:
	# Choose a function to modify
	idx, line = random.choice(function_defs)

	# Find the indentation level of the function body
	if idx + 1 < len(lines):
	body_indent = len(lines[idx + 1]) - len(lines[idx + 1].lstrip())
	body_indent_str = ' ' * body_indent

	# Add inefficient code at the beginning of the function
	lines.insert(idx + 1, f"{body_indent_str}# Inefficient data structure usage")
	lines.insert(idx + 2, f"{body_indent_str}data = []")
	lines.insert(idx + 3, f"{body_indent_str}for i in range(1000):")
	lines.insert(idx + 4, f"{body_indent_str} data.append(i)")
	lines.insert(idx + 5, f"{body_indent_str} # Inefficient search operation")
	lines.insert(idx + 6, f"{body_indent_str} if i in data: # Linear search instead of using a set")
	lines.insert(idx + 7, f"{body_indent_str} pass")

	# Update the code
	modified_code = '\n'.join(lines)
	state.code_context["code"] = modified_code

	# Add information about the modification
	if "bugs" not in state.code_context:
	state.code_context["bugs"] = []

	state.code_context["bugs"].append({
	"type": "performance",
	"line": idx + 1,
	"description": f"Performance issue introduced around line {idx + 1}"
	})

	def _add_edge_case_issue(self, state: ProblemState, solution: str, parsed_solution: ast.Module) -> None:
	"""
	Add an edge case issue to the solution code.

	Args:
	state: The problem state to modify
	solution: The current solution
	parsed_solution: The parsed AST of the solution
	"""
	lines = solution.split('\n')

	# Find functions in the code
	functions = []
	current_func = None
	func_start = None
	for i, line in enumerate(lines):
	if line.strip().startswith("def "):
	if current_func:
	functions.append((func_start, i - 1, current_func))
	current_func = line.strip()[4:].split("(")[0]
	func_start = i
	elif i == len(lines) - 1 and current_func:
	functions.append((func_start, i, current_func))

	if functions:
	# Choose a function to modify
	start_idx, end_idx, func_name = random.choice(functions)

	# Choose a modification type
	mod_type = random.choice([
	"remove_boundary_check",
	"introduce_zero_division",
	"handling_empty_input",
	"type_assumption"
	])

	if mod_type == "remove_boundary_check":
	# Find and remove or modify boundary checks
	for i in range(start_idx, end_idx + 1):
	if re.search(r'if\s+.*(?:len\|count\|size\|length\|empty\|<=\|>=\|<\|>\|\!=)', lines[i]):
	# Comment out the boundary check
	lines[i] = f"# {lines[i]} # Boundary check removed"
	# Skip the body of the if statement
	j = i + 1
	indent = len(lines[i]) - len(lines[i].lstrip())
	body_indent = indent + 4
	while j <= end_idx and (not lines[j].strip() or len(lines[j]) - len(lines[j].lstrip()) >= body_indent):
	lines[j] = f"# {lines[j]}"
	j += 1
	break

	elif mod_type == "introduce_zero_division":
	# Find division operations and modify them
	for i in range(start_idx, end_idx + 1):
	if "/" in lines[i] and "try" not in lines[i] and "except" not in lines[i]:
	# Remove denominator check if it exists
	if re.search(r'if\s+.(?:!=\s0\|>\s*0)', lines[i]):
	lines[i] = f"# {lines[i]} # Denominator check removed"
	else:
	# Or modify a division to potentially cause zero division
	match = re.search(r'(\w+)\s/\s(\w+)', lines[i])
	if match:
	denominator = match.group(2)
	# Add a potential zero value for the denominator
	indent = len(lines[i]) - len(lines[i].lstrip())
	indent_str = ' ' * indent
	lines.insert(i, f"{indent_str}if random.random() < 0.1: # Introduce potential zero division")
	lines.insert(i + 1, f"{indent_str} {denominator} = 0")
	break

	elif mod_type == "handling_empty_input":
	# Modify parameter handling to not handle empty inputs correctly
	params = re.search(r'def\s+\w+\s\((.?)\)', lines[start_idx])
	if params and params.group(1):
	param_list = [p.strip() for p in params.group(1).split(",")]
	if param_list:
	param = param_list[0].split("=")[0].strip()
	# Find checks for the parameter
	for i in range(start_idx + 1, end_idx + 1):
	if re.search(rf'if\s+.(?:not\s+{param}\|len\s\(\s{param}\s\)\s==\s0)', lines[i]):
	# Comment out the empty check
	lines[i] = f"# {lines[i]} # Empty input check removed"
	# Skip the body of the if statement
	j = i + 1
	indent = len(lines[i]) - len(lines[i].lstrip())
	body_indent = indent + 4
	while j <= end_idx and (not lines[j].strip() or len(lines[j]) - len(lines[j].lstrip()) >= body_indent):
	lines[j] = f"# {lines[j]}"
	j += 1
	break

	elif mod_type == "type_assumption":
	# Introduce assumptions about parameter types
	params = re.search(r'def\s+\w+\s\((.?)\)', lines[start_idx])
	if params and params.group(1):
	param_list = [p.strip() for p in params.group(1).split(",")]
	if param_list:
	param = param_list[0].split("=")[0].strip()
	# Find type checks for the parameter
	type_check_found = False
	for i in range(start_idx + 1, end_idx + 1):
	if re.search(rf'(?:isinstance\|type)\s\(\s{param}\s*,', lines[i]):
	# Comment out the type check
	lines[i] = f"# {lines[i]} # Type check removed"
	type_check_found = True
	break

	if not type_check_found:
	# Add a problematic type assumption
	indent = 4 # Assume basic indentation
	for i in range(start_idx + 1, min(start_idx + 5, end_idx + 1)):
	if lines[i].strip():
	indent = len(lines[i]) - len(lines[i].lstrip())
	break

	indent_str = ' ' * indent
	# Add code that assumes a specific type
	lines.insert(start_idx + 1, f"{indent_str}# Assuming {param} is a specific type without checking")
	lines.insert(start_idx + 2, f"{indent_str}{param}_length = len({param}) # Will fail if {param} doesn't support len()")

	# Update the code
	modified_code = '\n'.join(lines)
	state.code_context["code"] = modified_code

	# Add information about the modification
	if "bugs" not in state.code_context:
	state.code_context["bugs"] = []

	state.code_context["bugs"].append({
	"type": "edge_case",
	"line": start_idx + 1,
	"description": f"Edge case issue introduced in function '{func_name}': {mod_type}"
	})

	def _generate_new_requirement(self, state: ProblemState, solution: str) -> Dict[str, Any]:
	"""
	Generate a new requirement based on the current state and solution.

	Args:
	state: The current problem state
	solution: The current solution

	Returns:
	A new requirement dictionary
	"""
	# Parse the solution to find functions and variables
	function_names = re.findall(r'def\s+(\w+)', solution)
	variable_names = re.findall(r'(\w+)\s*=', solution)

	# Choose a requirement type
	req_type = random.choice([
	"edge_case_handling",
	"performance_improvement",
	"error_handling",
	"type_checking",
	"feature_addition"
	])

	if req_type == "edge_case_handling":
	if function_names:
	func_name = random.choice(function_names)
	edge_cases = [
	"empty input",
	"negative values",
	"zero values",
	"extremely large values",
	"special characters",
	"duplicate values"
	]
	edge_case = random.choice(edge_cases)
	return {
	"type": "edge_case_handling",
	"description": f"The function '{func_name}' should handle {edge_case} correctly.",
	"difficulty": random.uniform(0.3, 0.7)
	}

	elif req_type == "performance_improvement":
	return {
	"type": "performance_improvement",
	"description": "The solution should be optimized to run in O(n) time or better.",
	"difficulty": random.uniform(0.4, 0.8)
	}

	elif req_type == "error_handling":
	error_types = [
	"invalid input",
	"division by zero",
	"file not found",
	"network timeout",
	"permission denied"
	]
	error_type = random.choice(error_types)
	return {
	"type": "error_handling",
	"description": f"The code should handle {error_type} errors gracefully.",
	"difficulty": random.uniform(0.2, 0.6)
	}

	elif req_type == "type_checking":
	if function_names:
	func_name = random.choice(function_names)
	return {
	"type": "type_checking",
	"description": f"The function '{func_name}' should validate input types before processing.",
	"difficulty": random.uniform(0.1, 0.5)
	}

	elif req_type == "feature_addition":
	features = [
	"logging capability",
	"progress tracking",
	"caching for repeated operations",
	"parameter validation",
	"configuration options"
	]
	feature = random.choice(features)
	return {
	"type": "feature_addition",
	"description": f"Add {feature} to the solution.",
	"difficulty": random.uniform(0.3, 0.7)
	}

	# Default requirement if none of the above were applicable
	return {
	"type": "general_improvement",
	"description": "Improve the overall code quality and readability.",
	"difficulty": random.uniform(0.1, 0.4)
	}

	def _modify_requirement(self, requirement: Dict[str, Any], state: ProblemState, solution: str) -> Dict[str, Any]:
	"""
	Modify an existing requirement to make it more challenging.

	Args:
	requirement: The requirement to modify
	state: The current problem state
	solution: The current solution

	Returns:
	The modified requirement
	"""
	# Make a copy of the requirement
	modified_req = copy.deepcopy(requirement)

	# Increase the difficulty
	modified_req["difficulty"] = min(1.0, requirement.get("difficulty", 0.3) + random.uniform(0.1, 0.3))

	# Modify the description based on the requirement type
	if requirement["type"] == "edge_case_handling":
	modified_req["description"] += " Additionally, it should handle very large inputs efficiently."

	elif requirement["type"] == "performance_improvement":
	modified_req["description"] = modified_req["description"].replace("O(n)", "O(log n)")

	elif requirement["type"] == "error_handling":
	modified_req["description"] += " And provide detailed error messages for debugging."

	elif requirement["type"] == "type_checking":
	modified_req["description"] += " And automatically convert types when possible."

	elif requirement["type"] == "feature_addition":
	modified_req["description"] += " Ensure this feature is configurable via parameters."

	else:
	modified_req["description"] += " The code should also be well-documented with comments."

	return modified_req

	def _add_edge_case_requirement(self, state: ProblemState) -> None:
	"""
	Add a requirement for handling edge cases.

	Args:
	state: The problem state to modify
	"""
	edge_cases = [
	"empty collections",
	"null/None values",
	"boundary values (min/max)",
	"negative numbers",
	"special characters",
	"Unicode characters",
	"very large inputs",
	"malformed input"
	]

	edge_case = random.choice(edge_cases)

	# Add a new requirement
	state.requirements.append({
	"type": "edge_case_handling",
	"description": f"The solution must correctly handle {edge_case}.",
	"difficulty": random.uniform(0.3, 0.7)
	})

	# Add test cases for the edge case if tests exist
	if "tests" in state.code_context:
	# Create a new test for the edge case
	test_template = self._generate_edge_case_test(edge_case, state.code_context)
	if test_template:
	state.code_context["tests"].append({
	"name": f"test_edge_case_{len(state.code_context['tests'])}",
	"content": test_template,
	"description": f"Test handling of {edge_case}"
	})

	def _increase_data_volume(self, state: ProblemState, solution: str) -> None:
	"""
	Modify the problem to require handling larger data volumes.

	Args:
	state: The problem state to modify
	solution: The current solution
	"""
	# Add a requirement for handling large data
	state.requirements.append({
	"type": "scalability",
	"description": "The solution must efficiently handle large datasets (10,000+ items).",
	"difficulty": random.uniform(0.5, 0.8)
	})

	# Modify existing tests to use larger data if tests exist
	if "tests" in state.code_context:
	for i, test in enumerate(state.code_context["tests"]):
	content = test["content"]

	# Look for small lists or arrays in tests
	for pattern, replacement in [
	(r'\[[^\]]{0,50}\]', '[random.randint(0, 1000) for _ in range(10000)]'),
	(r'range\(\d+\)', 'range(10000)'),
	(r'"[^"]{0,20}"', '"' + 'a' * 10000 + '"')
	]:
	match = re.search(pattern, content)
	if match and random.random() < 0.3: # Only replace some instances
	content = content.replace(match.group(0), replacement, 1)
	break

	state.code_context["tests"][i]["content"] = content
	state.code_context["tests"][i]["description"] = f"{test.get('description', 'Test')} (with large data)"

	def _add_performance_constraint(self, state: ProblemState, solution: str) -> None:
	"""
	Add a performance constraint to the problem.

	Args:
	state: The problem state to modify
	solution: The current solution
	"""
	# Choose a performance constraint
	constraints = [
	"linear time complexity (O(n))",
	"logarithmic time complexity (O(log n))",
	"constant memory usage (O(1) space)",
	"execution time under 100ms for large inputs",
	"minimal function calls"
	]

	constraint = random.choice(constraints)

	# Add a new requirement
	state.requirements.append({
	"type": "performance",
	"description": f"The solution must achieve {constraint}.",
	"difficulty": random.uniform(0.6, 0.9)
	})

	# Add performance testing code if tests exist
	if "tests" in state.code_context:
	# Add a performance test
	perf_test = self._generate_performance_test(constraint, state.code_context)
	if perf_test:
	state.code_context["tests"].append({
	"name": f"test_performance_{len(state.code_context['tests'])}",
	"content": perf_test,
	"description": f"Test {constraint}"
	})

	def _expand_functionality(self, state: ProblemState, solution: str) -> None:
	"""
	Expand the required functionality of the solution.

	Args:
	state: The problem state to modify
	solution: The current solution
	"""
	# Choose a functionality expansion
	expansions = [
	"support for different input types",
	"parameterized behavior",
	"additional output formats",
	"flexible error handling",
	"integration with external systems"
	]

	expansion = random.choice(expansions)

	# Add a new requirement
	state.requirements.append({
	"type": "functionality",
	"description": f"Expand the solution to include {expansion}.",
	"difficulty": random.uniform(0.4, 0.8)
	})

	# Add test cases for the new functionality if tests exist
	if "tests" in state.code_context:
	# Create a new test for the expanded functionality
	test_template = self._generate_functionality_test(expansion, state.code_context)
	if test_template:
	state.code_context["tests"].append({
	"name": f"test_expanded_functionality_{len(state.code_context['tests'])}",
	"content": test_template,
	"description": f"Test {expansion}"
	})

	def _generate_default_test(self) -> str:
	"""
	Generate a default test based on the current problem state.

	Returns:
	A default test script
	"""
	# Generate a basic test script
	return """
	import unittest
	import sys
	import os

	# Add the directory containing the solution to the path
	sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

	# Import the solution
	from solution import *

	class DefaultTest(unittest.TestCase):
	def test_basic_functionality(self):
	# A basic test that should pass if the solution is correct
	self.assertTrue(True, "Basic assertion failed")

	def test_expected_output(self):
	# Test expected output of main functions
	# This will need to be updated based on the specific problem
	pass

	if __name__ == '__main__':
	unittest.main()
	"""

	def _generate_edge_case_test(self, edge_case: str, code_context: Dict[str, Any]) -> str:
	"""
	Generate a test for an edge case.

	Args:
	edge_case: The edge case to test
	code_context: The code context containing information about the problem

	Returns:
	A test script for the edge case
	"""
	# Extract function names from the code context
	function_names = []
	if "code" in code_context:
	function_names = re.findall(r'def\s+(\w+)', code_context["code"])

	if not function_names:
	return None

	# Choose a function to test
	function_name = random.choice(function_names)

	# Generate test code based on the edge case
	if edge_case == "empty collections":
	return f"""
	import unittest
	import sys
	import os

	sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
	from solution import {function_name}

	class EmptyCollectionTest(unittest.TestCase):
	def test_empty_input(self):
	# Test with empty list
	result = {function_name}([])
	self.assertIsNotNone(result, "Function should handle empty list")

	# Test with empty string
	result = {function_name}("")
	self.assertIsNotNone(result, "Function should handle empty string")

	# Test with empty dict
	result = {function_name}({{}})
	self.assertIsNotNone(result, "Function should handle empty dict")

	if __name__ == '__main__':
	unittest.main()
	"""
	elif edge_case == "null/None values":
	return f"""
	import unittest
	import sys
	import os

	sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
	from solution import {function_name}

	class NoneValueTest(unittest.TestCase):
	def test_none_input(self):
	# Test with None as input
	result = {function_name}(None)
	self.assertIsNotNone(result, "Function should handle None input")

	# Test with list containing None
	result = {function_name}([1, None, 3])
	self.assertIsNotNone(result, "Function should handle list with None values")

	if __name__ == '__main__':
	unittest.main()
	"""
	elif edge_case == "boundary values (min/max)":
	return f"""
	# recursive_swe_bench/task_generators/bug_fixing.py (completion)

	import unittest
	import sys
	import os
	import sys

	sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
	from solution import {function_name}

	class BoundaryValueTest(unittest.TestCase):
	def test_min_max_values(self):
	# Test with minimum integer
	min_int = -sys.maxsize - 1
	result = {function_name}(min_int)
	self.assertIsNotNone(result, "Function should handle minimum integer")

	# Test with maximum integer
	max_int = sys.maxsize
	result = {function_name}(max_int)
	self.assertIsNotNone(result, "Function should handle maximum integer")

	# Test with very large list
	large_list = list(range(10000))
	result = {function_name}(large_list)
	self.assertIsNotNone(result, "Function should handle very large inputs")

	if __name__ == '__main__':
	unittest.main()
	"""
	elif edge_case == "negative numbers":
	return f"""
	import unittest
	import sys
	import os

	sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
	from solution import {function_name}

	class NegativeNumberTest(unittest.TestCase):
	def test_negative_numbers(self):
	# Test with negative number
	result = {function_name}(-1)
	self.assertIsNotNone(result, "Function should handle negative numbers")

	# Test with list of negative numbers
	result = {function_name}([-1, -2, -3])
	self.assertIsNotNone(result, "Function should handle lists of negative numbers")

	# Test with mixed positive and negative
	result = {function_name}([-1, 0, 1])
	self.assertIsNotNone(result, "Function should handle mixed positive and negative")

	if __name__ == '__main__':
	unittest.main()
	"""
	else:
	# Generic edge case test
	return f"""
	import unittest
	import sys
	import os

	sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
	from solution import {function_name}

	class EdgeCaseTest(unittest.TestCase):
	def test_edge_case_{edge_case.replace(' ', '_')}(self):
	# Test edge case: {edge_case}
	# This is a placeholder test that needs to be customized for the specific edge case
	self.assertTrue(True, "Edge case test not implemented")

	if __name__ == '__main__':
	unittest.main()
	"""

	def _generate_performance_test(self, constraint: str, code_context: Dict[str, Any]) -> str:
	"""
	Generate a performance test based on a constraint.

	Args:
	constraint: The performance constraint
	code_context: The code context containing information about the problem

	Returns:
	A test script for the performance constraint
	"""
	# Extract function names from the code context
	function_names = []
	if "code" in code_context:
	function_names = re.findall(r'def\s+(\w+)', code_context["code"])

	if not function_names:
	return None

	# Choose a function to test
	function_name = random.choice(function_names)

	if "time complexity" in constraint:
	return f"""
	import unittest
	import sys
	import os
	import time
	import random

	sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
	from solution import {function_name}

	class PerformanceTest(unittest.TestCase):
	def test_time_complexity(self):
	# Test for {constraint}
	sizes = [100, 1000, 10000]
	times = []

	for size in sizes:
	# Generate input of the given size
	input_data = [random.randint(0, 1000) for _ in range(size)]

	# Measure execution time
	start_time = time.time()
	{function_name}(input_data)
	end_time = time.time()

	times.append(end_time - start_time)

	# Check if time grows appropriately
	# For O(n), time should grow linearly with input size
	# For O(log n), time should grow logarithmically
	# This is a simplified check and might need adjustment
	if "log n" in "{constraint}":
	# For logarithmic time, the ratio of times should decrease
	ratio1 = times[1] / times[0]
	ratio2 = times[2] / times[1]
	self.assertLess(ratio2, ratio1 * 1.5,
	f"Growth rate appears super-logarithmic: {times}")
	else: # Assume linear or better
	# For linear time, the ratio of times should be roughly equal to ratio of sizes
	ratio1 = times[1] / times[0]
	size_ratio1 = sizes[1] / sizes[0]

	ratio2 = times[2] / times[1]
	size_ratio2 = sizes[2] / sizes[1]

	self.assertLess(ratio1, size_ratio1 * 1.5,
	f"First growth rate appears super-linear: {times}")
	self.assertLess(ratio2, size_ratio2 * 1.5,
	f"Second growth rate appears super-linear: {times}")

	if __name__ == '__main__':
	unittest.main()
	"""
	elif "execution time" in constraint:
	return f"""
	import unittest
	import sys
	import os
	import time
	import random

	sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
	from solution import {function_name}

	class PerformanceTest(unittest.TestCase):
	def test_execution_time(self):
	# Test for {constraint}
	# Generate a large input
	input_data = [random.randint(0, 1000) for _ in range(10000)]

	# Measure execution time
	start_time = time.time()
	{function_name}(input_data)
	end_time = time.time()

	execution_time = (end_time - start_time) * 1000 # Convert to ms

	self.assertLess(execution_time, 100,
	f"Execution time exceeded 100ms: {execution_time:.2f}ms")

	if __name__ == '__main__':
	unittest.main()
	"""
	elif "memory usage" in constraint:
	return f"""
	import unittest
	import sys
	import os
	import psutil
	import random

	sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
	from solution import {function_name}

	class MemoryUsageTest(unittest.TestCase):
	def test_memory_usage(self):
	# Test for {constraint}
	# Note: This is an approximate test and may not be accurate in all environments

	# Get current process
	process = psutil.Process(os.getpid())

	# Measure memory before
	memory_before = process.memory_info().rss / 1024 / 1024 # MB

	# Generate a large input
	input_data = [random.randint(0, 1000) for _ in range(100000)]

	# Run function
	{function_name}(input_data)

	# Measure memory after
	memory_after = process.memory_info().rss / 1024 / 1024 # MB

	# Calculate memory usage
	memory_used = memory_after - memory_before

	# A crude approximation, adjust as needed
	self.assertLess(memory_used, 10,
	f"Memory usage seems high: {memory_used:.2f}MB")

	if __name__ == '__main__':
	unittest.main()
	"""
	else:
	# Generic performance test
	return f"""
	import unittest
	import sys
	import os
	import time
	import random

	sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
	from solution import {function_name}

	class PerformanceTest(unittest.TestCase):
	def test_performance(self):
	# Test for {constraint}
	# This is a placeholder test that needs to be customized for the specific constraint

	# Generate a large input
	input_data = [random.randint(0, 1000) for _ in range(10000)]

	# Measure execution time
	start_time = time.time()
	{function_name}(input_data)
	end_time = time.time()

	execution_time = end_time - start_time

	# Just log the time for now
	print(f"Execution time: {execution_time:.4f} seconds")
	self.assertTrue(True, "Performance test completed")

	if __name__ == '__main__':
	unittest.main()
	"""

	def _generate_functionality_test(self, expansion: str, code_context: Dict[str, Any]) -> str:
	"""
	Generate a test for expanded functionality.

	Args:
	expansion: The functionality expansion
	code_context: The code context containing information about the problem

	Returns:
	A test script for the expanded functionality
	"""
	# Extract function names from the code context
	function_names = []
	if "code" in code_context:
	function_names = re.findall(r'def\s+(\w+)', code_context["code"])

	if not function_names:
	return None

	# Choose a function to test
	function_name = random.choice(function_names)

	if "different input types" in expansion:
	return f"""
	import unittest
	import sys
	import os
	import json
	from collections import namedtuple

	sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
	from solution import {function_name}

	class InputTypesTest(unittest.TestCase):
	def test_different_input_types(self):
	# Test with different types of inputs

	# Test with list
	list_input = [1, 2, 3]
	list_result = {function_name}(list_input)
	self.assertIsNotNone(list_result, "Function should handle list input")

	# Test with tuple
	tuple_input = (1, 2, 3)
	tuple_result = {function_name}(tuple_input)
	self.assertIsNotNone(tuple_result, "Function should handle tuple input")

	# Test with set
	set_input = {{1, 2, 3}}
	set_result = {function_name}(set_input)
	self.assertIsNotNone(set_result, "Function should handle set input")

	# Test with dictionary
	dict_input = {{"a": 1, "b": 2, "c": 3}}
	dict_result = {function_name}(dict_input)
	self.assertIsNotNone(dict_result, "Function should handle dictionary input")

	# Test with JSON string
	json_input = '{{"data": [1, 2, 3]}}'
	json_result = {function_name}(json_input)
	self.assertIsNotNone(json_result, "Function should handle JSON string")

	# Test with custom object
	Point = namedtuple('Point', ['x', 'y'])
	obj_input = Point(1, 2)
	obj_result = {function_name}(obj_input)
	self.assertIsNotNone(obj_result, "Function should handle custom object")

	if __name__ == '__main__':
	unittest.main()
	"""
	elif "parameterized behavior" in expansion:
	return f"""
	import unittest
	import sys
	import os

	sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
	from solution import {function_name}

	class ParameterizedTest(unittest.TestCase):
	def test_parameterized_behavior(self):
	# Test function with different parameters

	# Base case with default parameters
	base_input = [1, 2, 3]
	base_result = {function_name}(base_input)

	# The function should now accept additional parameters
	# These are example parameters, adjust based on the specific function

	# With sorting parameter
	try:
	sorted_result = {function_name}(base_input, sort=True)
	self.assertIsNotNone(sorted_result, "Function should handle sort parameter")
	except TypeError as e:
	self.fail(f"Function does not support sort parameter: {{e}}")

	# With filtering parameter
	try:
	filtered_result = {function_name}(base_input, filter_fn=lambda x: x > 1)
	self.assertIsNotNone(filtered_result, "Function should handle filter_fn parameter")
	except TypeError as e:
	self.fail(f"Function does not support filter_fn parameter: {{e}}")

	# With formatting parameter
	try:
	formatted_result = {function_name}(base_input, format="json")
	self.assertIsNotNone(formatted_result, "Function should handle format parameter")
	except TypeError as e:
	self.fail(f"Function does not support format parameter: {{e}}")

	if __name__ == '__main__':
	unittest.main()
	"""
	elif "additional output formats" in expansion:
	return f"""
	import unittest
	import sys
	import os
	import json

	sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
	from solution import {function_name}

	class OutputFormatsTest(unittest.TestCase):
	def test_output_formats(self):
	# Test function with different output formats
	input_data = [1, 2, 3]

	# Original format
	original_result = {function_name}(input_data)

	# The function should now support different output formats
	# These are example formats, adjust based on the specific function

	# JSON format
	try:
	json_result = {function_name}(input_data, format="json")
	# Check if it's valid JSON
	try:
	json_obj = json.loads(json_result) if isinstance(json_result, str) else json_result
	self.assertIsNotNone(json_obj, "JSON result should be valid")
	except json.JSONDecodeError:
	self.fail("JSON result is not valid")
	except TypeError as e:
	self.fail(f"Function does not support JSON format: {{e}}")

	# CSV format
	try:
	csv_result = {function_name}(input_data, format="csv")
	self.assertIsNotNone(csv_result, "CSV result should not be None")
	if isinstance(csv_result, str):
	self.assertIn(",", csv_result, "CSV result should contain commas")
	except TypeError as e:
	self.fail(f"Function does not support CSV format: {{e}}")

	# XML format
	try:
	xml_result = {function_name}(input_data, format="xml")
	self.assertIsNotNone(xml_result, "XML result should not be None")
	if isinstance(xml_result, str):
	self.assertIn("<", xml_result, "XML result should contain tags")
	self.assertIn(">", xml_result, "XML result should contain tags")
	except TypeError as e:
	self.fail(f"Function does not support XML format: {{e}}")

	if __name__ == '__main__':
	unittest.main()
	"""
	else:
	# Generic functionality expansion test
	return f"""
	import unittest
	import sys
	import os

	sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
	from solution import {function_name}

	class ExpandedFunctionalityTest(unittest.TestCase):
	def test_expanded_functionality(self):
	# Test for {expansion}
	# This is a placeholder test that needs to be customized for the specific expansion

	# Basic test to verify the function exists
	input_data = [1, 2, 3]
	result = {function_name}(input_data)
	self.assertIsNotNone(result, "Function should return a result")

	# You need to add specific tests for the expanded functionality

	if __name__ == '__main__':
	unittest.main()
	"""

	def _calculate_adaptation_vector(self, solution: str, result: EvaluationResult, feedback: Feedback) -> List[float]:
	"""
	Calculate an adaptation vector based on the solution, result, and feedback.

	The adaptation vector encodes how the problem should evolve in future iterations,
	capturing dimensions like difficulty, bug type emphasis, and feedback focus.

	Args:
	solution: The current solution
	result: The evaluation results
	feedback: The feedback provided

	Returns:
	An adaptation vector (list of floats)
	"""
	# Initialize adaptation vector with zeros
	# Dimensions:
	# [0] - difficulty adjustment
	# [1] - syntax vs logical bug emphasis
	# [2] - performance focus
	# [3] - edge case focus
	# [4] - requirement expansion
	adaptation_vector = [0.0] * 5

	# Adjust difficulty based on score
	if result.score > 0.95:
	adaptation_vector[0] = 0.2 # Increase difficulty significantly
	elif result.score > 0.8:
	adaptation_vector[0] = 0.1 # Increase difficulty moderately
	elif result.score > 0.6:
	adaptation_vector[0] = 0.0 # Maintain current difficulty
	elif result.score > 0.4:
	adaptation_vector[0] = -0.1 # Decrease difficulty moderately
	else:
	adaptation_vector[0] = -0.2 # Decrease difficulty significantly

	# Adjust bug type emphasis based on error types
	syntax_issues = sum(1 for issue in feedback.issues if issue.get("error_type") == "syntax")
	logical_issues = sum(1 for issue in feedback.issues if issue.get("type") == "test_failure")

	if syntax_issues > logical_issues:
	adaptation_vector[1] = -0.1 # Move toward more logical bugs
	elif logical_issues > syntax_issues:
	adaptation_vector[1] = 0.1 # Move toward more syntax bugs

	# Adjust performance focus based on execution time and metrics
	if result.metrics and "execution_time" in result.metrics:
	if result.metrics["execution_time"] > self.config.get("performance_threshold", 1.0):
	adaptation_vector[2] = 0.2 # Increase performance focus
	else:
	adaptation_vector[2] = -0.1 # Decrease performance focus

	# Adjust edge case focus based on test failures
	if result.test_results:
	edge_case_failures = sum(1 for test_name, test_result in result.test_results.items()
	if not test_result["passed"] and "edge" in test_name.lower())
	if edge_case_failures > 0:
	adaptation_vector[3] = 0.2 # Increase edge case focus
	else:
	adaptation_vector[3] = 0.0 # Maintain current edge case focus

	# Adjust requirement expansion based on current state
	current_requirements = len(self.state.requirements)
	if current_requirements < 3:
	adaptation_vector[4] = 0.1 # Increase likelihood of adding requirements
	elif current_requirements >= 5:
	adaptation_vector[4] = -0.1 # Decrease likelihood of adding requirements

	return adaptation_vector


	class DefaultTestRunner:
	"""Default test runner for evaluating bug fixes."""

	def run_tests(self, solution_file: Path, test_files: List[Path], code_context: Dict[str, Any]) -> Dict[str, Any]:
	"""
	Run tests against a solution file.

	Args:
	solution_file: Path to the solution file
	test_files: List of test file paths
	code_context: Context information about the code

	Returns:
	Dictionary of test results
	"""
	# Initialize results
	results = {
	"all_passed": True,
	"passed_tests": 0,
	"total_tests": 0,
	"tests": {},
	"execution": {
	"success": True,
	"error": None,
	"stdout": None,
	"stderr": None
	},
	"execution_time": 0.0
	}

	# Import the solution to check for syntax errors
	try:
	# Check if the solution file exists
	if not solution_file.exists():
	results["execution"]["success"] = False
	results["execution"]["error"] = "Solution file not found"
	results["all_passed"] = False
	return results

	# Try to import the module to test for syntax errors
	sys.path.insert(0, str(solution_file.parent))
	import importlib.util
	spec = importlib.util.spec_from_file_location("solution", solution_file)
	solution_module = importlib.util.module_from_spec(spec)
	spec.loader.exec_module(solution_module)

	# Check for required functions
	if "required_functions" in code_context:
	for func_name in code_context["required_functions"]:
	if not hasattr(solution_module, func_name):
	results["execution"]["success"] = False
	results["execution"]["error"] = f"Required function '{func_name}' not found"
	results["all_passed"] = False
	return results

	except Exception as e:
	results["execution"]["success"] = False
	results["execution"]["error"] = str(e)
	results["all_passed"] = False
	return results

	# Run each test file
	for test_file in test_files:
	# Skip if the test file doesn't exist
	if not test_file.exists():
	continue

	# Run the test file
	import unittest
	import io
	from contextlib import redirect_stdout, redirect_stderr

	# Create a test loader and find tests in the file
	loader = unittest.TestLoader()
	try:
	tests = loader.discover(str(test_file.parent), pattern=test_file.name)

	# Count the number of test cases
	test_cases = 0
	for suite in tests:
	for test_case in suite:
	test_cases += test_case.countTestCases()

	results["total_tests"] += test_cases

	# Run the tests
	runner = unittest.TextTestRunner(verbosity=2)

	# Capture stdout and stderr
	stdout_buffer = io.StringIO()
	stderr_buffer = io.StringIO()

	with redirect_stdout(stdout_buffer), redirect_stderr(stderr_buffer):
	test_result = runner.run(tests)

	stdout = stdout_buffer.getvalue()
	stderr = stderr_buffer.getvalue()

	# Check if all tests passed
	if not test_result.wasSuccessful():
	results["all_passed"] = False

	# Count passed tests
	passed_tests = test_cases - len(test_result.failures) - len(test_result.errors)
	results["passed_tests"] += passed_tests

	# Store individual test results
	test_name = test_file.stem
	results["tests"][test_name] = {
	"passed": test_result.wasSuccessful(),
	"failures": len(test_result.failures),
	"errors": len(test_result.errors),
	"skipped": len(test_result.skipped),
	"total": test_cases,
	"passed_count": passed_tests,
	"stdout": stdout,
	"stderr": stderr
	}

	# Extract more detailed information about failures
	for failure in test_result.failures:
	test_id = failure[0].id()
	failure_message = failure[1]

	# Extract expected and actual values if available
	import re
	expected_match = re.search(r'Expected\s*:(.+)', failure_message)
	actual_match = re.search(r'Actual\s*:(.+)', failure_message)

	expected = expected_match.group(1).strip() if expected_match else None
	actual = actual_match.group(1).strip() if actual_match else None

	if test_id not in results["tests"]:
	results["tests"][test_id] = {}

	results["tests"][test_id].update({
	"passed": False,
	"message": failure_message,
	"expected": expected,
	"actual": actual
	})

	except Exception as e:
	# If the test file itself has errors
	results["all_passed"] = False
	results["tests"][test_file.stem] = {
	"passed": False,
	"error": str(e),
	"failures": 1,
	"errors": 1,
	"skipped": 0,
	"total": 1,
	"passed_count": 0
	}
	results["total_tests"] += 1

	return results


	class BugFixingTaskGenerator:
	"""Generator for bug fixing tasks."""

	def __init__(self, config: Dict[str, Any] = None):
	"""
	Initialize the bug fixing task generator.

	Args:
	config: Configuration options
	"""
	self.config = config or {}
	self.difficulty_levels = self.config.get(
	"difficulty_levels",
	["easy", "medium", "hard", "expert"]
	)
	self.bug_categories = self.config.get(
	"bug_categories",
	[
	BugCategory.SYNTAX,
	BugCategory.LOGICAL,
	BugCategory.EDGE_CASE,
	BugCategory.PERFORMANCE
	]
	)
	self.test_templates = self._load_test_templates()

	def generate_task(self, difficulty: str = None, bug_categories: List[str] = None) -> BugFixingTask:
	"""
	Generate a new bug fixing task.

	Args:
	difficulty: The difficulty level (easy, medium, hard, expert)
	bug_categories: List of bug categories to include

	Returns:
	A new bug fixing task
	"""
	# Choose difficulty if not specified
	if difficulty is None:
	difficulty = random.choice(self.difficulty_levels)

	# Choose bug categories if not specified
	if bug_categories is None:
	num_categories = random.randint(1, 3)
	bug_categories = random.sample(self.bug_categories, num_categories)

	# Generate a problem based on difficulty and bug categories
	problem_state = self._generate_problem_state(difficulty, bug_categories)

	# Create config for the task
	task_config = {
	"difficulty": difficulty,
	"bug_categories": bug_categories,
	"convergence_criteria": {
	"score_threshold": 0.95,
	"min_iterations": 1,
	"max_iterations": self.config.get("max_iterations", 5),
	"score_delta_threshold": 0.05,
	"consecutive_plateau_limit": 2
	},
	"score_weights": {
	"test": 0.7,
	"execution": 0.3
	},
	"performance_threshold": 1.0,
	"complexity_threshold": 0.7
	}

	# Create and return the task
	return BugFixingTask(problem_state, task_config)

	def _generate_problem_state(self, difficulty: str, bug_categories: List[str]) -> ProblemState:
	"""
	Generate a problem state for the given difficulty and bug categories.

	Args:
	difficulty: The difficulty level
	bug_categories: List of bug categories

	Returns:
	A problem state for the task
	"""
	# Choose a template based on difficulty and bug categories
	template = self._choose_template(difficulty, bug_categories)

	# Create a copy of the template
	problem_state = copy.deepcopy(template)

	# Generate a unique ID
	problem_state.problem_id = str(uuid.uuid4())

	# Initialize evolution stage and adaptation vector
	problem_state.evolution_stage = 0
	problem_state.adaptation_vector = [0.0] * 5

	# Adjust difficulty value based on level
	difficulty_values = {
	"easy": 0.25,
	"medium": 0.5,
	"hard": 0.75,
	"expert": 0.9
	}
	problem_state.difficulty = difficulty_values.get(difficulty, 0.5)

	# Insert bugs based on categories
	for category in bug_categories:
	self._insert_bug(problem_state, category)

	# Update description to reflect the current state
	problem_state.description = self._generate_description(problem_state)

	return problem_state

	def _choose_template(self, difficulty: str, bug_categories: List[str]) -> ProblemState:
	"""
	Choose a template that matches the difficulty and bug categories.

	Args:
	difficulty: The difficulty level
	bug_categories: List of bug categories

	Returns:
	A template problem state
	"""
	# In a real implementation, this would load from a database of templates
	# For now, we'll generate a simple template

	# Generate code context with a sample function
	code = self._generate_template_code(difficulty, bug_categories)
	tests = self._generate_template_tests(code)

	# Create a basic problem state
	return ProblemState(
	problem_id="template",
	description="Fix the bugs in the given code.",
	code_context={
	"code": code,
	"tests": tests,
	"bug_count": 0,
	"bug_categories": []
	},
	requirements=[
	{
	"type": "functional",
	"description": "The code should pass all the provided tests.",
	"difficulty": 0.3
	}
	],
	difficulty=0.5, # Will be overridden
	evolution_stage=0,
	adaptation_vector=[0.0] * 5
	)

	def _generate_template_code(self, difficulty: str, bug_categories: List[str]) -> str:
	"""
	Generate template code based on difficulty and bug categories.

	Args:
	difficulty: The difficulty level
	bug_categories: List of bug categories

	Returns:
	Template code
	"""
	# For demonstration, we'll use a few predefined templates
	templates = {
	"easy": """
	def calculate_sum(numbers):
	\"\"\"Calculate the sum of a list of numbers.\"\"\"
	total = 0
	for num in numbers:
	total += num
	return total

	def calculate_average(numbers):
	\"\"\"Calculate the average of a list of numbers.\"\"\"
	if not numbers:
	return 0
	return calculate_sum(numbers) / len(numbers)
	""",
	"medium": """
	def find_most_frequent(items):
	\"\"\"Find the most frequently occurring item in
	# recursive_swe_bench/task_generators/bug_fixing.py (template generation)

	def find_most_frequent(items):
	"""Find the most frequently occurring item in a list."""
	if not items:
	return None

	counts = {}
	for item in items:
	if item in counts:
	counts[item] += 1
	else:
	counts[item] = 1

	max_count = 0
	max_item = None
	for item, count in counts.items():
	if count > max_count:
	max_count = count
	max_item = item

	return max_item

	def binary_search(sorted_list, target):
	"""Perform binary search on a sorted list."""
	left = 0
	right = len(sorted_list) - 1

	while left <= right:
	mid = (left + right) // 2
	if sorted_list[mid] == target:
	return mid
	elif sorted_list[mid] < target:
	left = mid + 1
	else:
	right = mid - 1

	return -1 # Target not found
	""",
	"hard": """
	def merge_sort(arr):
	"""Sort an array using the merge sort algorithm."""
	if len(arr) <= 1:
	return arr

	# Split the array into two halves
	mid = len(arr) // 2
	left_half = arr[:mid]
	right_half = arr[mid:]

	# Recursively sort both halves
	left_half = merge_sort(left_half)
	right_half = merge_sort(right_half)

	# Merge the sorted halves
	return merge(left_half, right_half)

	def merge(left, right):
	"""Merge two sorted arrays."""
	result = []
	i = j = 0

	# Compare elements from both arrays and add the smaller one to the result
	while i < len(left) and j < len(right):
	if left[i] <= right[j]:
	result.append(left[i])
	i += 1
	else:
	result.append(right[j])
	j += 1

	# Add any remaining elements
	result.extend(left[i:])
	result.extend(right[j:])

	return result

	def quicksort(arr):
	"""Sort an array using the quicksort algorithm."""
	if len(arr) <= 1:
	return arr

	# Choose the pivot (using the first element for simplicity)
	pivot = arr[0]

	# Partition the array
	less = [x for x in arr[1:] if x <= pivot]
	greater = [x for x in arr[1:] if x > pivot]

	# Recursively sort the partitions and combine
	return quicksort(less) + [pivot] + quicksort(greater)
	""",
	"expert": """
	class Node:
	"""Node in a binary tree."""
	def __init__(self, value):
	self.value = value
	self.left = None
	self.right = None

	def build_binary_tree(values):
	"""Build a binary tree from a list of values."""
	if not values:
	return None

	root = Node(values[0])
	queue = [root]
	i = 1

	while queue and i < len(values):
	node = queue.pop(0)

	# Add left child
	if i < len(values) and values[i] is not None:
	node.left = Node(values[i])
	queue.append(node.left)
	i += 1

	# Add right child
	if i < len(values) and values[i] is not None:
	node.right = Node(values[i])
	queue.append(node.right)
	i += 1

	return root

	def is_balanced(root):
	"""Check if a binary tree is balanced."""
	def height(node):
	if not node:
	return 0
	return max(height(node.left), height(node.right)) + 1

	def is_balanced_helper(node):
	if not node:
	return True

	left_height = height(node.left)
	right_height = height(node.right)

	if abs(left_height - right_height) > 1:
	return False

	return is_balanced_helper(node.left) and is_balanced_helper(node.right)

	return is_balanced_helper(root)

	def find_lca(root, p, q):
	"""Find the lowest common ancestor of two nodes in a binary tree."""
	if not root:
	return None

	if root.value == p or root.value == q:
	return root

	left_lca = find_lca(root.left, p, q)
	right_lca = find_lca(root.right, p, q)

	if left_lca and right_lca:
	return root

	return left_lca if left_lca else right_lca
	"""
	}

	# Choose a template based on difficulty
	if difficulty in templates:
	return templates[difficulty]
	else:
	return templates["medium"] # Default to medium if difficulty not found

	def _generate_template_tests(self, code: str) -> List[Dict[str, Any]]:
	"""
	Generate template tests based on the code.

	Args:
	code: The template code

	Returns:
	List of test dictionaries
	"""
	# Extract function names from the code
	function_names = re.findall(r'def\s+(\w+)', code)

	# Generate tests for each function
	tests = []
	for func_name in function_names:
	test_content = self._generate_test_for_function(func_name)
	if test_content:
	tests.append({
	"name": f"test_{func_name}",
	"content": test_content,
	"description": f"Test for {func_name} function"
	})

	return tests

	def _generate_test_for_function(self, func_name: str) -> str:
	"""
	Generate a test for a specific function.

	Args:
	func_name: The name of the function to test

	Returns:
	Test content
	"""
	# Check if we have a template for this function
	if func_name in self.test_templates:
	return self.test_templates[func_name]

	# Generate a basic test based on the function name
	if "sum" in func_name.lower():
	return """
	import unittest
	import sys
	import os

	sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
	from solution import calculate_sum

	class TestCalculateSum(unittest.TestCase):
	def test_calculate_sum(self):
	self.assertEqual(calculate_sum([1, 2, 3, 4, 5]), 15)
	self.assertEqual(calculate_sum([]), 0)
	self.assertEqual(calculate_sum([-1, -2, -3]), -6)

	if __name__ == '__main__':
	unittest.main()
	"""
	elif "average" in func_name.lower():
	return """
	import unittest
	import sys
	import os

	sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
	from solution import calculate_average

	class TestCalculateAverage(unittest.TestCase):
	def test_calculate_average(self):
	self.assertEqual(calculate_average([1, 2, 3, 4, 5]), 3)
	self.assertEqual(calculate_average([]), 0)
	self.assertEqual(calculate_average([10]), 10)

	if __name__ == '__main__':
	unittest.main()
	"""
	elif "frequent" in func_name.lower():
	return """
	import unittest
	import sys
	import os

	sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
	from solution import find_most_frequent

	class TestFindMostFrequent(unittest.TestCase):
	def test_find_most_frequent(self):
	self.assertEqual(find_most_frequent([1, 2, 2, 3, 3, 3, 4]), 3)
	self.assertEqual(find_most_frequent(['a', 'b', 'a', 'c', 'a']), 'a')
	self.assertIsNone(find_most_frequent([]))
	self.assertEqual(find_most_frequent([5]), 5)

	if __name__ == '__main__':
	unittest.main()
	"""
	elif "search" in func_name.lower():
	return """
	import unittest
	import sys
	import os

	sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
	from solution import binary_search

	class TestBinarySearch(unittest.TestCase):
	def test_binary_search(self):
	self.assertEqual(binary_search([1, 2, 3, 4, 5], 3), 2)
	self.assertEqual(binary_search([1, 2, 3, 4, 5], 1), 0)
	self.assertEqual(binary_search([1, 2, 3, 4, 5], 5), 4)
	self.assertEqual(binary_search([1, 2, 3, 4, 5], 6), -1)
	self.assertEqual(binary_search([], 5), -1)

	if __name__ == '__main__':
	unittest.main()
	"""
	elif "sort" in func_name.lower():
	return """
	import unittest
	import sys
	import os

	sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
	from solution import {0}

	class Test{1}(unittest.TestCase):
	def test_sorting(self):
	self.assertEqual({0}([]), [])
	self.assertEqual({0}([1]), [1])
	self.assertEqual({0}([3, 1, 4, 1, 5, 9, 2, 6, 5]), [1, 1, 2, 3, 4, 5, 5, 6, 9])
	self.assertEqual({0}([9, 8, 7, 6, 5, 4, 3, 2, 1]), [1, 2, 3, 4, 5, 6, 7, 8, 9])
	self.assertEqual({0}([1, 1, 1, 1]), [1, 1, 1, 1])

	if __name__ == '__main__':
	unittest.main()
	""".format(func_name, func_name.title())
	elif "balanced" in func_name.lower():
	return """
	import unittest
	import sys
	import os

	sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
	from solution import Node, is_balanced

	class TestIsBalanced(unittest.TestCase):
	def test_is_balanced(self):
	# Create a balanced tree
	# 1
	# / \\
	# 2 3
	# / \\ / \\
	# 4 5 6 7
	root = Node(1)
	root.left = Node(2)
	root.right = Node(3)
	root.left.left = Node(4)
	root.left.right = Node(5)
	root.right.left = Node(6)
	root.right.right = Node(7)
	self.assertTrue(is_balanced(root))

	# Create an unbalanced tree
	# 1
	# / \\
	# 2 3
	# / \\
	# 4 5
	#/
	#6
	root = Node(1)
	root.left = Node(2)
	root.right = Node(3)
	root.left.left = Node(4)
	root.left.right = Node(5)
	root.left.left.left = Node(6)
	self.assertFalse(is_balanced(root))

	# Empty tree is balanced
	self.assertTrue(is_balanced(None))

	if __name__ == '__main__':
	unittest.main()
	"""
	elif "lca" in func_name.lower():
	return """
	import unittest
	import sys
	import os

	sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
	from solution import Node, find_lca

	class TestFindLCA(unittest.TestCase):
	def test_find_lca(self):
	# Create a tree
	# 1
	# / \\
	# 2 3
	# / \\ / \\
	# 4 5 6 7
	root = Node(1)
	root.left = Node(2)
	root.right = Node(3)
	root.left.left = Node(4)
	root.left.right = Node(5)
	root.right.left = Node(6)
	root.right.right = Node(7)

	# Test cases
	self.assertEqual(find_lca(root, 4, 5).value, 2) # LCA of 4 and 5 is 2
	self.assertEqual(find_lca(root, 4, 6).value, 1) # LCA of 4 and 6 is 1
	self.assertEqual(find_lca(root, 3, 7).value, 3) # LCA of 3 and 7 is 3
	self.assertEqual(find_lca(root, 2, 7).value, 1) # LCA of 2 and 7 is 1

	if __name__ == '__main__':
	unittest.main()
	"""
	elif "tree" in func_name.lower():
	return """
	import unittest
	import sys
	import os

	sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
	from solution import Node, build_binary_tree

	class TestBuildBinaryTree(unittest.TestCase):
	def test_build_binary_tree(self):
	# Test empty list
	self.assertIsNone(build_binary_tree([]))

	# Test single node
	root = build_binary_tree([1])
	self.assertEqual(root.value, 1)
	self.assertIsNone(root.left)
	self.assertIsNone(root.right)

	# Test complete tree
	# 1
	# / \\
	# 2 3
	# / \\ / \\
	# 4 5 6 7
	values = [1, 2, 3, 4, 5, 6, 7]
	root = build_binary_tree(values)
	self.assertEqual(root.value, 1)
	self.assertEqual(root.left.value, 2)
	self.assertEqual(root.right.value, 3)
	self.assertEqual(root.left.left.value, 4)
	self.assertEqual(root.left.right.value, 5)
	self.assertEqual(root.right.left.value, 6)
	self.assertEqual(root.right.right.value, 7)

	# Test tree with None values
	# 1
	# / \\
	# 2 3
	# / /
	# 4 6
	values = [1, 2, 3, 4, None, 6, None]
	root = build_binary_tree(values)
	self.assertEqual(root.value, 1)
	self.assertEqual(root.left.value, 2)
	self.assertEqual(root.right.value, 3)
	self.assertEqual(root.left.left.value, 4)
	self.assertIsNone(root.left.right)
	self.assertEqual(root.right.left.value, 6)
	self.assertIsNone(root.right.right)

	if __name__ == '__main__':
	unittest.main()
	"""
	else:
	# Generic test template
	return """
	import unittest
	import sys
	import os

	sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
	from solution import {0}

	class Test{1}(unittest.TestCase):
	def test_{0}(self):
	# TODO: Add specific test cases for {0}
	# This is a placeholder test
	self.assertTrue(True)

	if __name__ == '__main__':
	unittest.main()
	""".format(func_name, func_name.title())

	def _load_test_templates(self) -> Dict[str, str]:
	"""
	Load test templates for common functions.

	Returns:
	Dictionary of test templates
	"""
	# In a real implementation, these would be loaded from files
	return {
	"calculate_sum": """
	import unittest
	import sys
	import os

	sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
	from solution import calculate_sum

	class TestCalculateSum(unittest.TestCase):
	def test_calculate_sum(self):
	self.assertEqual(calculate_sum([1, 2, 3, 4, 5]), 15)
	self.assertEqual(calculate_sum([]), 0)
	self.assertEqual(calculate_sum([-1, -2, -3]), -6)

	if __name__ == '__main__':
	unittest.main()
	""",
	"calculate_average": """
	import unittest
	import sys
	import os

	sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
	from solution import calculate_average

	class TestCalculateAverage(unittest.TestCase):
	def test_calculate_average(self):
	self.assertEqual(calculate_average([1, 2, 3, 4, 5]), 3)
	self.assertEqual(calculate_average([]), 0)
	self.assertEqual(calculate_average([10]), 10)

	if __name__ == '__main__':
	unittest.main()
	"""
	}

	def _insert_bug(self, problem_state: ProblemState, bug_category: str) -> None:
	"""
	Insert a bug of the specified category into the problem state.

	Args:
	problem_state: The problem state to modify
	bug_category: The category of bug to insert
	"""
	if "code" not in problem_state.code_context:
	return

	# Parse the code to find potential bug insertion points
	code = problem_state.code_context["code"]
	try:
	parsed_code = ast.parse(code)
	except SyntaxError:
	# If the code already has syntax errors, don't add more bugs
	return

	# Insert different types of bugs based on the category
	if bug_category == BugCategory.SYNTAX:
	self._insert_syntax_bug(problem_state)
	elif bug_category == BugCategory.LOGICAL:
	self._insert_logical_bug(problem_state)
	elif bug_category == BugCategory.PERFORMANCE:
	self._insert_performance_bug(problem_state)
	elif bug_category == BugCategory.EDGE_CASE:
	self._insert_edge_case_bug(problem_state)
	else:
	# Default to logical bug
	self._insert_logical_bug(problem_state)

	# Update bug count and categories
	if "bug_count" not in problem_state.code_context:
	problem_state.code_context["bug_count"] = 0
	problem_state.code_context["bug_count"] += 1

	if "bug_categories" not in problem_state.code_context:
	problem_state.code_context["bug_categories"] = []
	if bug_category not in problem_state.code_context["bug_categories"]:
	problem_state.code_context["bug_categories"].append(bug_category)

	def _insert_syntax_bug(self, problem_state: ProblemState) -> None:
	"""
	Insert a syntax bug into the problem state.

	Args:
	problem_state: The problem state to modify
	"""
	code = problem_state.code_context["code"]
	lines = code.split('\n')
	if not lines:
	return

	# Choose a non-empty line to modify
	idx = random.randint(0, len(lines) - 1)
	line = lines[idx]

	# Skip empty lines or comment lines
	attempts = 0
	while (not line.strip() or line.strip().startswith('#')) and attempts < 10:
	idx = random.randint(0, len(lines) - 1)
	line = lines[idx]
	attempts += 1

	if attempts >= 10:
	# Couldn't find a suitable line, use the first non-empty line
	for i, line in enumerate(lines):
	if line.strip() and not line.strip().startswith('#'):
	idx = i
	break
	else:
	return # No suitable line found

	# Choose a modification type
	mod_type = random.choice([
	"remove_character",
	"add_character",
	"swap_characters",
	"change_indent"
	])

	if mod_type == "remove_character" and line:
	char_idx = random.randint(0, len(line) - 1)
	lines[idx] = line[:char_idx] + line[char_idx+1:]

	elif mod_type == "add_character":
	char_idx = random.randint(0, len(line))
	char = random.choice(["(", ")", "{", "}", "[", "]", ":", ";", ",", "."])
	lines[idx] = line[:char_idx] + char + line[char_idx:]

	elif mod_type == "swap_characters" and len(line) >= 2:
	char_idx = random.randint(0, len(line) - 2)
	lines[idx] = (line[:char_idx] + line[char_idx+1] +
	line[char_idx] + line[char_idx+2:])

	elif mod_type == "change_indent":
	# Either add or remove indentation
	if line.startswith(" "):
	lines[idx] = line[2:] # Remove some indent
	else:
	lines[idx] = " " + line # Add inconsistent indent

	# Update the code
	problem_state.code_context["code"] = '\n'.join(lines)

	# Add information about the bug
	if "bugs" not in problem_state.code_context:
	problem_state.code_context["bugs"] = []

	problem_state.code_context["bugs"].append({
	"type": BugCategory.SYNTAX,
	"line": idx + 1,
	"description": f"Syntax error introduced in line {idx + 1}"
	})

	def _insert_logical_bug(self, problem_state: ProblemState) -> None:
	"""
	Insert a logical bug into the problem state.

	Args:
	problem_state: The problem state to modify
	"""
	code = problem_state.code_context["code"]
	lines = code.split('\n')
	if not lines:
	return

	# Find all if statements and loops
	if_statements = []
	for i, line in enumerate(lines):
	if re.search(r'\bif\b\|\bwhile\b\|\bfor\b', line):
	if_statements.append((i, line))

	# Choose a modification type
	mod_type = random.choice([
	"change_comparison",
	"invert_condition",
	"off_by_one",
	"change_operator",
	"reverse_logic"
	])

	if if_statements:
	# Choose an if statement to modify
	idx, line = random.choice(if_statements)

	if mod_type == "change_comparison":
	# Change comparison operators
	comparisons = {"==": "!=", "!=": "==", ">": "<", "<": ">", ">=": "<=", "<=": ">="}
	for op, new_op in comparisons.items():
	if op in line:
	lines[idx] = line.replace(op, new_op, 1)
	break

	elif mod_type == "invert_condition":
	# Add or remove a "not" to invert the condition
	if "not" in line:
	lines[idx] = line.replace("not ", "", 1)
	else:
	match = re.search(r'(if\|while)\s+([^:]+):', line)
	if match:
	condition = match.group(2)
	lines[idx] = line.replace(condition, f"not ({condition})", 1)

	elif mod_type == "off_by_one":
	# Introduce an off-by-one error
	for op in ["+", "-"]:
	if op in line:
	# If there's a number after the operator, change it
	match = re.search(f'\\{op}\\s*(\\d+)', line)
	if match:
	num = int(match.group(1))
	new_num = num + 1 if op == "+" else max(0, num - 1)
	lines[idx] = line.replace(f"{op} {num}", f"{op} {new_num}", 1)
	break

	elif mod_type == "change_operator":
	# Change arithmetic or logical operators
	operators = {"+": "-", "-": "+", "": "/", "/": "", "and": "or", "or": "and"}
	for op, new_op in operators.items():
	if f" {op} " in line:
	lines[idx] = line.replace(f" {op} ", f" {new_op} ", 1)
	break

	elif mod_type == "reverse_logic":
	# Reverse the logic of a compound condition
	if " and " in line:
	parts = line.split(" and ")
	lines[idx] = line.replace(" and ".join(parts), " or ".join(parts), 1)
	elif " or " in line:
	parts = line.split(" or ")
	lines[idx] = line.replace(" or ".join(parts), " and ".join(parts), 1)

	else:
	# If no if statements found, introduce a different kind of logical error
	# Find variable assignments
	assignments = []
	for i, line in enumerate(lines):
	if "=" in line and "==" not in line and "!=" not in line:
	assignments.append((i, line))

	if assignments:
	# Choose an assignment to modify
	idx, line = random.choice(assignments)

	# Modify the assignment
	if "+" in line:
	lines[idx] = line.replace("+", "-", 1)
	elif "-" in line:
	lines[idx] = line.replace("-", "+", 1)
	elif "*" in line:
	lines[idx] = line.replace("*", "/", 1)
	elif "/" in line:
	lines[idx] = line.replace("/", "*", 1)
	else:
	# If no arithmetic operator, change the value
	match = re.search(r'=\s*(\d+)', line)
	if match:
	num = int(match.group(1))
	new_num = num + random.choice([-1, 1]) * random.randint(1, 3)
	lines[idx] = line.replace(f"= {num}", f"= {new_num}", 1)

	# Update the code
	problem_state.code_context["code"] = '\n'.join(lines)

	# Add information about the bug
	if "bugs" not in problem_state.code_context:
	problem_state.code_context["bugs"] = []

	problem_state.code_context["bugs"].append({
	"type": BugCategory.LOGICAL,
	"line": idx + 1,
	"description": f"Logical error introduced in line {idx + 1}"
	})

	def _insert_performance_bug(self, problem_state: ProblemState) -> None:
	"""
	Insert a performance bug into the problem state.

	Args:
	problem_state: The problem state to modify
	"""
	code = problem_state.code_context["code"]
	lines = code.split('\n')
	if not lines:
	return

	# Find functions in the code
	functions = []
	current_func = None
	func_start = None
	for i, line in enumerate(lines):
	if line.strip().startswith("def "):
	if current_func:
	functions.append((func_start, i - 1, current_func))
	current_func = line.strip()[4:].split("(")[0]
	func_start = i
	elif i == len(lines) - 1 and current_func:
	functions.append((func_start, i, current_func))

	if not functions:
	return

	# Choose a function to modify
	start_idx, end_idx, func_name = random.choice(functions)

	# Choose a modification type
	mod_type = random.choice([
	"add_nested_loop",
	"inefficient_data_structure",
	"redundant_computation"
	])

	if mod_type == "add_nested_loop":
	# Find indentation of the function
	for i in range(start_idx + 1, end_idx + 1):
	if lines[i].strip():
	indent = len(lines[i]) - len(lines[i].lstrip())
	break
	else:
	indent = 4

	# Find a suitable place to add a nested loop
	for i in range(start_idx + 1, end_idx + 1):
	if "for " in lines[i] or "while " in lines[i]:
	# Add a nested loop after this loop
	inner_indent = len(lines[i]) - len(lines[i].lstrip()) + 4
	inner_indent_str = ' ' * inner_indent

	# Add an unnecessary nested loop
	lines.insert(i + 1, f"{inner_indent_str}for _ in range(100): # Inefficient nested loop")
	lines.insert(i + 2, f"{inner_indent_str} pass")

	# Update indices
	end_idx += 2
	break
	else:
	# If no loop found, add one at the beginning of the function
	inner_indent = indent + 4
	inner_indent_str = ' ' * inner_indent

	# Find the first non-docstring line
	for i in range(start_idx + 1, end_idx + 1):
	if lines[i].strip() and not (lines[i].strip().startswith('"""') or lines[i].strip().startswith("'''")):
	# Add an unnecessary loop
	lines.insert(i, f"{' ' * indent}for i in range(100): # Inefficient loop")
	lines.insert(i + 1, f"{inner_indent_str}pass")

	# Update indices
	end_idx += 2
	break

	elif mod_type == "ineff
	# recursive_swe_bench/task_generators/bug_fixing.py (finalized)

	elif mod_type == "inefficient_data_structure":
	# Find indentation of the function
	for i in range(start_idx + 1, end_idx + 1):
	if lines[i].strip():
	indent = len(lines[i]) - len(lines[i].lstrip())
	break
	else:
	indent = 4

	# Find a suitable place to add inefficient data structure usage
	for i in range(start_idx + 1, end_idx + 1):
	if "def " not in lines[i] and lines[i].strip():
	# Add inefficient data structure usage after this line
	indent_str = ' ' * indent

	# Add inefficient code
	lines.insert(i + 1, f"{indent_str}# Inefficient data structure usage")
	lines.insert(i + 2, f"{indent_str}results = []")
	lines.insert(i + 3, f"{indent_str}for i in range(1000): # Unnecessarily large range")
	lines.insert(i + 4, f"{indent_str} # Using list instead of set for lookups")
	lines.insert(i + 5, f"{indent_str} if i % 10 in results: # O(n) lookup instead of O(1)")
	lines.insert(i + 6, f"{indent_str} results.append(i) # Unnecessary storage")

	# Update indices
	end_idx += 6
	break

	elif mod_type == "redundant_computation":
	# Find indentation of the function
	for i in range(start_idx + 1, end_idx + 1):
	if lines[i].strip():
	indent = len(lines[i]) - len(lines[i].lstrip())
	break
	else:
	indent = 4

	# Find a suitable place to add redundant computation
	for i in range(start_idx + 1, end_idx + 1):
	if "for " in lines[i] or "while " in lines[i]:
	# Add redundant computation inside the loop
	inner_indent = len(lines[i]) - len(lines[i].lstrip()) + 4
	inner_indent_str = ' ' * inner_indent

	# Add redundant computation
	lines.insert(i + 1, f"{inner_indent_str}# Redundant computation in each iteration")
	lines.insert(i + 2, f"{inner_indent_str}temp_sum = 0")
	lines.insert(i + 3, f"{inner_indent_str}for j in range(100): # Unnecessary nested computation")
	lines.insert(i + 4, f"{inner_indent_str} temp_sum += j")

	# Update indices
	end_idx += 4
	break

	# Update the code
	problem_state.code_context["code"] = '\n'.join(lines)

	# Add information about the bug
	if "bugs" not in problem_state.code_context:
	problem_state.code_context["bugs"] = []

	problem_state.code_context["bugs"].append({
	"type": BugCategory.PERFORMANCE,
	"line": start_idx + 1,
	"description": f"Performance issue introduced in function '{func_name}'"
	})

	def _insert_edge_case_bug(self, problem_state: ProblemState) -> None:
	"""
	Insert an edge case bug into the problem state.

	Args:
	problem_state: The problem state to modify
	"""
	code = problem_state.code_context["code"]
	lines = code.split('\n')
	if not lines:
	return

	# Find functions in the code
	functions = []
	current_func = None
	func_start = None
	for i, line in enumerate(lines):
	if line.strip().startswith("def "):
	if current_func:
	functions.append((func_start, i - 1, current_func))
	current_func = line.strip()[4:].split("(")[0]
	func_start = i
	elif i == len(lines) - 1 and current_func:
	functions.append((func_start, i, current_func))

	if not functions:
	return

	# Choose a function to modify
	start_idx, end_idx, func_name = random.choice(functions)

	# Choose a modification type
	mod_type = random.choice([
	"remove_boundary_check",
	"missing_edge_case",
	"type_assumption"
	])

	if mod_type == "remove_boundary_check":
	# Find boundary checks (if statements with conditions that check boundaries)
	boundary_checks = []
	for i in range(start_idx + 1, end_idx + 1):
	if (re.search(r'if\s+.*(len\|empty\|<=\|>=\|<\|>\|==\|!=)', lines[i]) and
	(("if not " in lines[i]) or ("if len(" in lines[i]) or
	("if " in lines[i] and " == 0" in lines[i]) or
	("if " in lines[i] and " == []" in lines[i]) or
	("if " in lines[i] and " == ''" in lines[i]) or
	("if " in lines[i] and " is None" in lines[i]))):
	boundary_checks.append(i)

	if boundary_checks:
	# Choose a boundary check to remove
	idx = random.choice(boundary_checks)

	# Comment out the boundary check
	lines[idx] = f"# {lines[idx]} # Boundary check removed"

	# Comment out the body of the if statement
	i = idx + 1
	while i <= end_idx and (not lines[i].strip() or len(lines[i]) - len(lines[i].lstrip()) > len(lines[idx]) - len(lines[idx].lstrip())):
	lines[i] = f"# {lines[i]}"
	i += 1
	else:
	# If no boundary check found, add code that assumes a non-empty input
	# Find the first non-docstring line in the function
	for i in range(start_idx + 1, end_idx + 1):
	if lines[i].strip() and not (lines[i].strip().startswith('"""') or lines[i].strip().startswith("'''")):
	indent = len(lines[i]) - len(lines[i].lstrip())
	indent_str = ' ' * indent

	# Add code that assumes non-empty input
	lines.insert(i, f"{indent_str}# Missing check for empty input")
	lines.insert(i + 1, f"{indent_str}first_item = items[0] # Will fail on empty input")

	# Update indices
	end_idx += 2
	break

	elif mod_type == "missing_edge_case":
	# Find a suitable place to insert the bug
	for i in range(start_idx + 1, end_idx + 1):
	if ("/" in lines[i] or
	"if " in lines[i] and "==" in lines[i] or
	"if " in lines[i] and "!=" in lines[i]):

	if "/" in lines[i] and not re.search(r'if\s+.!=\s0', lines[i-1]):
	# Add code that doesn't check for zero division
	indent = len(lines[i]) - len(lines[i].lstrip())
	indent_str = ' ' * indent

	# Extract the denominator
	match = re.search(r'/\s*(\w+)', lines[i])
	if match:
	denominator = match.group(1)

	# Comment out any existing check
	j = i - 1
	while j >= start_idx and len(lines[j]) - len(lines[j].lstrip()) >= indent:
	if f"if {denominator}" in lines[j] and "== 0" in lines[j]:
	lines[j] = f"# {lines[j]} # Zero division check removed"
	j -= 1

	# Add a comment about the missing check
	lines.insert(i, f"{indent_str}# Missing check for zero division")

	# Update indices
	end_idx += 1
	break

	elif ("==" in lines[i] or "!=" in lines[i]) and "None" not in lines[i]:
	# Comment out edge case check
	lines[i] = f"# {lines[i]} # Edge case check removed"
	break
	else:
	# If no suitable place found, add code that doesn't handle an edge case
	# Find the first non-docstring line in the function
	for i in range(start_idx + 1, end_idx + 1):
	if lines[i].strip() and not (lines[i].strip().startswith('"""') or lines[i].strip().startswith("'''")):
	indent = len(lines[i]) - len(lines[i].lstrip())
	indent_str = ' ' * indent

	# Add code that doesn't handle an edge case
	lines.insert(i, f"{indent_str}# Missing handling for edge cases")
	lines.insert(i + 1, f"{indent_str}# This function doesn't handle special cases properly")

	# Update indices
	end_idx += 2
	break

	elif mod_type == "type_assumption":
	# Find a suitable place to insert a type assumption bug
	for i in range(start_idx + 1, end_idx + 1):
	if re.search(r'for\s+\w+\s+in\s+\w+', lines[i]) or "=" in lines[i] and "[" in lines[i]:
	# Extract the variable name
	var_match = re.search(r'for\s+\w+\s+in\s+(\w+)', lines[i])
	if not var_match:
	var_match = re.search(r'(\w+)\s*=', lines[i])

	if var_match:
	var_name = var_match.group(1)
	indent = len(lines[i]) - len(lines[i].lstrip())
	indent_str = ' ' * indent

	# Add code that assumes a specific type
	lines.insert(i + 1, f"{indent_str}# Type assumption: {var_name} is assumed to be a list")
	lines.insert(i + 2, f"{indent_str}if len({var_name}) > 0: # Will fail if {var_name} doesn't support len()")
	lines.insert(i + 3, f"{indent_str} first = {var_name}[0] # Will fail if {var_name} is not subscriptable")

	# Update indices
	end_idx += 3
	break
	else:
	# If no suitable place found, add code at the beginning of the function
	for i in range(start_idx + 1, end_idx + 1):
	if lines[i].strip() and not (lines[i].strip().startswith('"""') or lines[i].strip().startswith("'''")):
	indent = len(lines[i]) - len(lines[i].lstrip())
	indent_str = ' ' * indent

	# Extract parameter name
	param_match = re.search(r'def\s+\w+\s\(\s(\w+)', lines[start_idx])
	param_name = param_match.group(1) if param_match else "input_data"

	# Add code that assumes a specific type
	lines.insert(i, f"{indent_str}# Type assumption: {param_name} is assumed to be a specific type")
	lines.insert(i + 1, f"{indent_str}{param_name}_str = str({param_name}) # Will fail if {param_name} can't be converted to string")

	# Update indices
	end_idx += 2
	break

	# Update the code
	problem_state.code_context["code"] = '\n'.join(lines)

	# Add information about the bug
	if "bugs" not in problem_state.code_context:
	problem_state.code_context["bugs"] = []

	problem_state.code_context["bugs"].append({
	"type": BugCategory.EDGE_CASE,
	"line": start_idx + 1,
	"description": f"Edge case bug introduced in function '{func_name}'"
	})

	def _generate_description(self, problem_state: ProblemState) -> str:
	"""
	Generate a description for the current problem state.

	Args:
	problem_state: The problem state

	Returns:
	A descriptive prompt for the problem
	"""
	# Base description
	bug_count = problem_state.code_context.get("bug_count", 0)
	plural = "bugs" if bug_count != 1 else "bug"

	base_desc = (
	f"Fix the {plural} in the code below. "
	f"There {'are' if bug_count != 1 else 'is'} {bug_count} {plural} to find and fix."
	)

	# Add information about bug categories
	if "bug_categories" in problem_state.code_context:
	categories = problem_state.code_context["bug_categories"]
	if categories:
	category_desc = ", ".join(categories)
	base_desc += f"\n\nThe code contains the following types of issues: {category_desc}."

	# Add requirements
	if problem_state.requirements:
	base_desc += "\n\nRequirements:"
	for i, req in enumerate(problem_state.requirements):
	base_desc += f"\n{i+1}. {req['description']}"

	# Add difficulty level
	difficulty_desc = "easy"
	if problem_state.difficulty > 0.3 and problem_state.difficulty <= 0.6:
	difficulty_desc = "moderate"
	elif problem_state.difficulty > 0.6 and problem_state.difficulty <= 0.8:
	difficulty_desc = "challenging"
	elif problem_state.difficulty > 0.8:
	difficulty_desc = "very challenging"

	base_desc += f"\n\nThis is a {difficulty_desc} bug fixing task."

	return base_desc


	# Default implementation of TestRunner for when no custom runner is provided
	class DefaultTestRunner:
	"""
	Default test runner for evaluating solutions.

	This class runs tests against a solution file and collects the results.
	"""

	def run_tests(
	self,
	solution_file: Path,
	test_files: List[Path],
	code_context: Dict[str, Any]
	) -> Dict[str, Any]:
	"""
	Run tests against a solution file.

	Args:
	solution_file: Path to the solution file
	test_files: List of test file paths
	code_context: Additional context about the code

	Returns:
	Dictionary containing test results
	"""
	# Initialize results dictionary
	results = {
	"all_passed": True,
	"passed_tests": 0,
	"total_tests": 0,
	"tests": {},
	"execution": {
	"success": True,
	"error": None,
	"stdout": "",
	"stderr": ""
	},
	"execution_time": 0.0
	}

	# Check if solution file exists
	if not solution_file.exists():
	results["execution"]["success"] = False
	results["execution"]["error"] = f"Solution file not found: {solution_file}"
	results["all_passed"] = False
	return results

	# Try to import the solution module
	try:
	start_time = time.time()

	# Add solution directory to path
	sys.path.insert(0, str(solution_file.parent))

	# Import the solution module
	spec = importlib.util.spec_from_file_location(
	"solution", solution_file)
	solution_module = importlib.util.module_from_spec(spec)
	spec.loader.exec_module(solution_module)

	# Remove the solution directory from path
	sys.path.pop(0)

	# Record execution time
	end_time = time.time()
	results["execution_time"] = end_time - start_time

	except Exception as e:
	results["execution"]["success"] = False
	results["execution"]["error"] = str(e)
	results["all_passed"] = False
	return results

	# Run each test file
	for test_file in test_files:
	# Skip if the test file doesn't exist
	if not test_file.exists():
	continue

	try:
	# Set up test loading
	loader = unittest.TestLoader()

	# Add test directory to path
	sys.path.insert(0, str(test_file.parent))

	# Capture stdout and stderr
	stdout_buffer = io.StringIO()
	stderr_buffer = io.StringIO()

	# Create a test suite from the test file
	test_suite = loader.discover(
	str(test_file.parent),
	pattern=test_file.name
	)

	# Count test cases
	test_count = 0
	for suite in test_suite:
	for test_case in suite:
	test_count += test_case.countTestCases()

	results["total_tests"] += test_count

	# Run the tests with captured output
	with redirect_stdout(stdout_buffer), redirect_stderr(stderr_buffer):
	test_runner = unittest.TextTestRunner(verbosity=2)
	test_result = test_runner.run(test_suite)

	# Get the captured output
	stdout = stdout_buffer.getvalue()
	stderr = stderr_buffer.getvalue()

	# Remove the test directory from path
	sys.path.pop(0)

	# Check if all tests passed
	if not test_result.wasSuccessful():
	results["all_passed"] = False

	# Count passed tests
	passed_tests = test_count - len(test_result.failures) - len(test_result.errors)
	results["passed_tests"] += passed_tests

	# Store individual test results
	test_name = test_file.stem
	results["tests"][test_name] = {
	"passed": test_result.wasSuccessful(),
	"failures": len(test_result.failures),
	"errors": len(test_result.errors),
	"skipped": len(test_result.skipped),
	"total": test_count,
	"passed_count": passed_tests,
	"stdout": stdout,
	"stderr": stderr
	}

	# Store details for individual test failures
	for failure in test_result.failures + test_result.errors:
	test_id = failure[0].id().split('.')[-1]
	failure_message = failure[1]

	# Try to extract expected and actual values
	expected_match = re.search(r'Expected\s*:(.+)', failure_message)
	actual_match = re.search(r'Actual\s*:(.+)', failure_message)

	expected = expected_match.group(1).strip() if expected_match else None
	actual = actual_match.group(1).strip() if actual_match else None

	if test_id not in results["tests"]:
	results["tests"][test_id] = {}

	results["tests"][test_id].update({
	"passed": False,
	"message": failure_message,
	"expected": expected,
	"actual": actual
	})

	except Exception as e:
	# If there's an error in the test file itself
	results["all_passed"] = False
	test_name = test_file.stem
	results["tests"][test_name] = {
	"passed": False,
	"error": str(e),
	"failures": 0,
	"errors": 1,
	"skipped": 0,
	"total": 1,
	"passed_count": 0
	}
	results["total_tests"] += 1

	return results