Spaces:

Souravdanyal
/

code-debug-env

Running

App Files Files Community

code-debug-env / server /tasks /task_hard.py

Souravdanyal

Final complete version - all fixes applied

8485798 2 months ago

raw

history blame

21.5 kB

	# server/tasks/task_hard.py
	# 15 hard tasks: algorithmic bugs + agent must explain what was wrong.
	# Reward is based on test pass rate PLUS explanation quality.

	import random

	HARD_TASKS = [
	{
	"task_id": "hard_001",
	"domain": "sorting algorithm",
	"instructions": (
	"The function implements bubble sort but is broken. "
	"Fix the algorithm AND explain what was wrong in your 'explanation' field. "
	"Explanation must mention: loop range, boundary, or swap logic."
	),
	"buggy_code": """\
	def bubble_sort(arr):
	n = len(arr)
	for i in range(n):
	for j in range(n - i):
	if arr[j] > arr[j + 1]:
	arr[j], arr[j + 1] = arr[j + 1], arr[j]
	return arr
	""",
	"fixed_code": """\
	def bubble_sort(arr):
	n = len(arr)
	for i in range(n):
	for j in range(n - i - 1):
	if arr[j] > arr[j + 1]:
	arr[j], arr[j + 1] = arr[j + 1], arr[j]
	return arr
	""",
	"explanation_keywords": ["boundary", "index", "range", "n - i - 1", "out of bounds", "last element"],
	"test_cases": [
	{"input": [64, 34, 25, 12, 22, 11, 90], "expected": [11, 12, 22, 25, 34, 64, 90]},
	{"input": [5, 1, 4, 2, 8], "expected": [1, 2, 4, 5, 8]},
	{"input": [1], "expected": [1]},
	],
	"test_cases_description": "Bubble sort with correct inner loop boundary (n - i - 1)",
	},
	{
	"task_id": "hard_002",
	"domain": "dynamic programming",
	"instructions": (
	"The function computes the longest increasing subsequence (LIS) length. "
	"Fix the algorithm AND explain what was wrong. "
	"Explanation must mention: initialization, dp transition, or base case."
	),
	"buggy_code": """\
	def lis_length(nums):
	if not nums:
	return 0
	dp = [0] * len(nums)
	for i in range(len(nums)):
	for j in range(i):
	if nums[j] < nums[i]:
	dp[i] = max(dp[i], dp[j] + 1)
	return max(dp)
	""",
	"fixed_code": """\
	def lis_length(nums):
	if not nums:
	return 0
	dp = [1] * len(nums)
	for i in range(len(nums)):
	for j in range(i):
	if nums[j] < nums[i]:
	dp[i] = max(dp[i], dp[j] + 1)
	return max(dp)
	""",
	"explanation_keywords": ["initialization", "base case", "dp[i]", "1", "zero", "initial value"],
	"test_cases": [
	{"input": [10, 9, 2, 5, 3, 7, 101, 18], "expected": 4},
	{"input": [0, 1, 0, 3, 2, 3], "expected": 4},
	{"input": [7, 7, 7, 7], "expected": 1},
	],
	"test_cases_description": "LIS with dp initialized to 1 (not 0)",
	},
	{
	"task_id": "hard_003",
	"domain": "binary search",
	"instructions": (
	"The function does binary search on a sorted list. "
	"Fix the algorithm AND explain what was wrong. "
	"Explanation must mention: mid calculation, overflow, boundary, or infinite loop."
	),
	"buggy_code": """\
	def binary_search(arr, target):
	low, high = 0, len(arr)
	while low < high:
	mid = (low + high) // 2
	if arr[mid] == target:
	return mid
	elif arr[mid] < target:
	low = mid
	else:
	high = mid - 1
	return -1
	""",
	"fixed_code": """\
	def binary_search(arr, target):
	low, high = 0, len(arr) - 1
	while low <= high:
	mid = (low + high) // 2
	if arr[mid] == target:
	return mid
	elif arr[mid] < target:
	low = mid + 1
	else:
	high = mid - 1
	return -1
	""",
	"explanation_keywords": ["high", "len - 1", "low = mid", "infinite loop", "boundary", "off-by-one"],
	"test_cases": [
	{"input": [[1, 3, 5, 7, 9], 7], "expected": 3},
	{"input": [[1, 3, 5, 7, 9], 1], "expected": 0},
	{"input": [[1, 3, 5, 7, 9], 6], "expected": -1},
	],
	"test_cases_description": "Binary search: high = len-1, low = mid+1, while low <= high",
	},
	{
	"task_id": "hard_004",
	"domain": "dynamic programming",
	"instructions": (
	"The function computes the minimum number of coins to make 'amount'. "
	"Fix the algorithm AND explain what was wrong. "
	"Explanation must mention: initialization, infinity, dp table, or base case."
	),
	"buggy_code": """\
	def coin_change(coins, amount):
	dp = [0] * (amount + 1)
	dp[0] = 0
	for i in range(1, amount + 1):
	for coin in coins:
	if coin <= i:
	dp[i] = min(dp[i], dp[i - coin] + 1)
	return dp[amount] if dp[amount] != 0 else -1
	""",
	"fixed_code": """\
	def coin_change(coins, amount):
	dp = [float('inf')] * (amount + 1)
	dp[0] = 0
	for i in range(1, amount + 1):
	for coin in coins:
	if coin <= i:
	dp[i] = min(dp[i], dp[i - coin] + 1)
	return dp[amount] if dp[amount] != float('inf') else -1
	""",
	"explanation_keywords": ["infinity", "inf", "initialization", "0 instead of inf", "unreachable", "base"],
	"test_cases": [
	{"input": [[1, 5, 6, 9], 11], "expected": 2},
	{"input": [[2], 3], "expected": -1},
	{"input": [[1, 2, 5], 11], "expected": 3},
	],
	"test_cases_description": "Coin change DP: initialized to inf, not 0",
	},
	{
	"task_id": "hard_005",
	"domain": "graph algorithm",
	"instructions": (
	"The function checks if a directed graph has a cycle using DFS. "
	"Fix it AND explain what was wrong. "
	"Explanation must mention: visited, recursion stack, back edge, or state."
	),
	"buggy_code": """\
	def has_cycle(graph):
	visited = set()

	def dfs(node):
	visited.add(node)
	for neighbor in graph.get(node, []):
	if neighbor in visited:
	return True
	if dfs(neighbor):
	return True
	return False

	for node in graph:
	if node not in visited:
	if dfs(node):
	return True
	return False
	""",
	"fixed_code": """\
	def has_cycle(graph):
	visited = set()
	rec_stack = set()

	def dfs(node):
	visited.add(node)
	rec_stack.add(node)
	for neighbor in graph.get(node, []):
	if neighbor not in visited:
	if dfs(neighbor):
	return True
	elif neighbor in rec_stack:
	return True
	rec_stack.remove(node)
	return False

	for node in graph:
	if node not in visited:
	if dfs(node):
	return True
	return False
	""",
	"explanation_keywords": ["recursion stack", "rec_stack", "back edge", "visited", "false positive", "path"],
	"test_cases": [
	{"input": {"A": ["B"], "B": ["C"], "C": ["A"]}, "expected": True},
	{"input": {"A": ["B"], "B": ["C"], "C": []}, "expected": False},
	{"input": {"A": ["B", "C"], "B": ["D"], "C": ["D"], "D": []}, "expected": False},
	],
	"test_cases_description": "Cycle detection in directed graph using recursion stack",
	},
	{
	"task_id": "hard_006",
	"domain": "dynamic programming",
	"instructions": (
	"The function computes the maximum subarray sum (Kadane's algorithm). "
	"Fix it AND explain what was wrong. "
	"Explanation must mention: initialization, negative numbers, current_sum, or reset."
	),
	"buggy_code": """\
	def max_subarray(nums):
	max_sum = 0
	current_sum = 0
	for n in nums:
	current_sum = max(n, current_sum + n)
	max_sum = max(max_sum, current_sum)
	return max_sum
	""",
	"fixed_code": """\
	def max_subarray(nums):
	max_sum = nums[0]
	current_sum = nums[0]
	for n in nums[1:]:
	current_sum = max(n, current_sum + n)
	max_sum = max(max_sum, current_sum)
	return max_sum
	""",
	"explanation_keywords": ["initialization", "negative", "nums[0]", "all negative", "zero", "initial"],
	"test_cases": [
	{"input": [-2, 1, -3, 4, -1, 2, 1, -5, 4], "expected": 6},
	{"input": [-1, -2, -3, -4], "expected": -1},
	{"input": [1], "expected": 1},
	],
	"test_cases_description": "Kadane's algorithm handles all-negative arrays",
	},
	{
	"task_id": "hard_007",
	"domain": "string algorithm",
	"instructions": (
	"The function checks if a string has balanced brackets. "
	"Fix it AND explain what was wrong. "
	"Explanation must mention: stack, matching, empty stack, or closing bracket."
	),
	"buggy_code": """\
	def is_balanced(s):
	stack = []
	matching = {')': '(', ']': '[', '}': '{'}
	for ch in s:
	if ch in '([{':
	stack.append(ch)
	elif ch in ')]}':
	if stack and stack[-1] == matching[ch]:
	stack.pop()
	return len(stack) == 0
	""",
	"fixed_code": """\
	def is_balanced(s):
	stack = []
	matching = {')': '(', ']': '[', '}': '{'}
	for ch in s:
	if ch in '([{':
	stack.append(ch)
	elif ch in ')]}':
	if not stack or stack[-1] != matching[ch]:
	return False
	stack.pop()
	return len(stack) == 0
	""",
	"explanation_keywords": ["stack", "empty stack", "mismatch", "not stack", "early return", "closing"],
	"test_cases": [
	{"input": "([{}])", "expected": True},
	{"input": "([)]", "expected": False},
	{"input": "]", "expected": False},
	],
	"test_cases_description": "Balanced brackets: early return False on mismatch or empty stack",
	},
	{
	"task_id": "hard_008",
	"domain": "dynamic programming",
	"instructions": (
	"The function computes the number of ways to climb n stairs (1 or 2 steps at a time). "
	"Fix it AND explain what was wrong. "
	"Explanation must mention: base case, dp, index, or off-by-one."
	),
	"buggy_code": """\
	def climb_stairs(n):
	if n <= 0:
	return 0
	dp = [0] * (n + 1)
	dp[0] = 1
	dp[1] = 1
	for i in range(3, n + 1):
	dp[i] = dp[i - 1] + dp[i - 2]
	return dp[n]
	""",
	"fixed_code": """\
	def climb_stairs(n):
	if n <= 0:
	return 0
	dp = [0] * (n + 1)
	dp[0] = 1
	dp[1] = 1
	for i in range(2, n + 1):
	dp[i] = dp[i - 1] + dp[i - 2]
	return dp[n]
	""",
	"explanation_keywords": ["range", "starts at 3", "range(2", "off-by-one", "dp[2]", "skipped"],
	"test_cases": [
	{"input": 2, "expected": 2},
	{"input": 3, "expected": 3},
	{"input": 5, "expected": 8},
	],
	"test_cases_description": "Climb stairs DP: loop starts at range(2, ...) not range(3, ...)",
	},
	{
	"task_id": "hard_009",
	"domain": "data processing",
	"instructions": (
	"The function implements quicksort. "
	"Fix it AND explain what was wrong. "
	"Explanation must mention: pivot, partition, recursion, or base case."
	),
	"buggy_code": """\
	def quicksort(arr):
	if len(arr) <= 1:
	return arr
	pivot = arr[0]
	left = [x for x in arr if x < pivot]
	right = [x for x in arr if x > pivot]
	return quicksort(left) + [pivot] + quicksort(right)
	""",
	"fixed_code": """\
	def quicksort(arr):
	if len(arr) <= 1:
	return arr
	pivot = arr[0]
	left = [x for x in arr[1:] if x <= pivot]
	right = [x for x in arr[1:] if x > pivot]
	return quicksort(left) + [pivot] + quicksort(right)
	""",
	"explanation_keywords": ["duplicate", "arr[1:]", "pivot included", "equal", "lost", "missing"],
	"test_cases": [
	{"input": [3, 6, 8, 10, 1, 2, 1], "expected": [1, 1, 2, 3, 6, 8, 10]},
	{"input": [5, 5, 5], "expected": [5, 5, 5]},
	{"input": [1], "expected": [1]},
	],
	"test_cases_description": "Quicksort handles duplicates: arr[1:] and x <= pivot",
	},
	{
	"task_id": "hard_010",
	"domain": "graph algorithm",
	"instructions": (
	"The function finds the shortest path length in an unweighted graph using BFS. "
	"Fix it AND explain what was wrong. "
	"Explanation must mention: visited, queue, infinite loop, or distance tracking."
	),
	"buggy_code": """\
	from collections import deque

	def bfs_shortest_path(graph, start, end):
	queue = deque([(start, 0)])
	while queue:
	node, dist = queue.popleft()
	if node == end:
	return dist
	for neighbor in graph.get(node, []):
	queue.append((neighbor, dist + 1))
	return -1
	""",
	"fixed_code": """\
	from collections import deque

	def bfs_shortest_path(graph, start, end):
	visited = set([start])
	queue = deque([(start, 0)])
	while queue:
	node, dist = queue.popleft()
	if node == end:
	return dist
	for neighbor in graph.get(node, []):
	if neighbor not in visited:
	visited.add(neighbor)
	queue.append((neighbor, dist + 1))
	return -1
	""",
	"explanation_keywords": ["visited", "infinite loop", "revisit", "cycle", "set", "already visited"],
	"test_cases": [
	{"input": [{"A": ["B", "C"], "B": ["D"], "C": ["D"], "D": []}, "A", "D"], "expected": 2},
	{"input": [{"A": ["B"], "B": ["A"]}, "A", "B"], "expected": 1},
	{"input": [{"A": ["B"]}, "A", "C"], "expected": -1},
	],
	"test_cases_description": "BFS shortest path with visited set to prevent revisiting",
	},
	{
	"task_id": "hard_011",
	"domain": "dynamic programming",
	"instructions": (
	"The function computes the 0/1 knapsack maximum value. "
	"Fix it AND explain what was wrong. "
	"Explanation must mention: capacity, dp table, iteration order, or overwrite."
	),
	"buggy_code": """\
	def knapsack(weights, values, capacity):
	n = len(weights)
	dp = [0] * (capacity + 1)
	for i in range(n):
	for w in range(weights[i], capacity + 1):
	dp[w] = max(dp[w], dp[w - weights[i]] + values[i])
	return dp[capacity]
	""",
	"fixed_code": """\
	def knapsack(weights, values, capacity):
	n = len(weights)
	dp = [0] * (capacity + 1)
	for i in range(n):
	for w in range(capacity, weights[i] - 1, -1):
	dp[w] = max(dp[w], dp[w - weights[i]] + values[i])
	return dp[capacity]
	""",
	"explanation_keywords": ["reverse", "backward", "overwrite", "0/1", "unbounded", "iteration order", "right to left"],
	"test_cases": [
	{"input": [[2, 3, 4, 5], [3, 4, 5, 6], 5], "expected": 7},
	{"input": [[1, 2, 3], [6, 10, 12], 5], "expected": 22},
	{"input": [[5], [10], 3], "expected": 0},
	],
	"test_cases_description": "0/1 Knapsack: inner loop must go backward to avoid using item twice",
	},
	{
	"task_id": "hard_012",
	"domain": "string algorithm",
	"instructions": (
	"The function finds the length of the longest substring without repeating characters. "
	"Fix it AND explain what was wrong. "
	"Explanation must mention: window, pointer, index, or update."
	),
	"buggy_code": """\
	def length_of_longest_substring(s):
	char_index = {}
	left = 0
	max_len = 0
	for right, ch in enumerate(s):
	if ch in char_index:
	left = char_index[ch] + 1
	char_index[ch] = right
	max_len = max(max_len, right - left + 1)
	return max_len
	""",
	"fixed_code": """\
	def length_of_longest_substring(s):
	char_index = {}
	left = 0
	max_len = 0
	for right, ch in enumerate(s):
	if ch in char_index and char_index[ch] >= left:
	left = char_index[ch] + 1
	char_index[ch] = right
	max_len = max(max_len, right - left + 1)
	return max_len
	""",
	"explanation_keywords": ["left pointer", "stale", "char_index[ch] >= left", "window", "shrink", "old index"],
	"test_cases": [
	{"input": "abcabcbb", "expected": 3},
	{"input": "bbbbb", "expected": 1},
	{"input": "pwwkew", "expected": 3},
	],
	"test_cases_description": "Longest substring without repeating: only update left if char is within current window",
	},
	{
	"task_id": "hard_013",
	"domain": "data processing",
	"instructions": (
	"The function merges overlapping intervals. "
	"Fix it AND explain what was wrong. "
	"Explanation must mention: sort, overlap, merge condition, or end index."
	),
	"buggy_code": """\
	def merge_intervals(intervals):
	if not intervals:
	return []
	intervals.sort(key=lambda x: x[0])
	merged = [intervals[0]]
	for start, end in intervals[1:]:
	if start <= merged[-1][0]:
	merged[-1][1] = max(merged[-1][1], end)
	else:
	merged.append([start, end])
	return merged
	""",
	"fixed_code": """\
	def merge_intervals(intervals):
	if not intervals:
	return []
	intervals.sort(key=lambda x: x[0])
	merged = [intervals[0]]
	for start, end in intervals[1:]:
	if start <= merged[-1][1]:
	merged[-1][1] = max(merged[-1][1], end)
	else:
	merged.append([start, end])
	return merged
	""",
	"explanation_keywords": ["merged[-1][1]", "end", "start", "overlap", "last interval", "index 1 vs 0"],
	"test_cases": [
	{"input": [[[1, 3], [2, 6], [8, 10]]], "expected": [[1, 6], [8, 10]]},
	{"input": [[[1, 4], [4, 5]]], "expected": [[1, 5]]},
	{"input": [[[1, 2]]], "expected": [[1, 2]]},
	],
	"test_cases_description": "Merge intervals: compare start with merged[-1][1] (end), not [0] (start)",
	},
	{
	"task_id": "hard_014",
	"domain": "math",
	"instructions": (
	"The function does integer square root (floor) without using sqrt(). "
	"Fix it AND explain what was wrong. "
	"Explanation must mention: binary search, convergence, mid, or boundary."
	),
	"buggy_code": """\
	def integer_sqrt(n):
	if n < 2:
	return n
	low, high = 1, n
	while low <= high:
	mid = (low + high) // 2
	if mid * mid == n:
	return mid
	elif mid * mid < n:
	low = mid + 1
	else:
	high = mid - 1
	return low
	""",
	"fixed_code": """\
	def integer_sqrt(n):
	if n < 2:
	return n
	low, high = 1, n // 2
	while low <= high:
	mid = (low + high) // 2
	if mid * mid == n:
	return mid
	elif mid * mid < n:
	low = mid + 1
	else:
	high = mid - 1
	return high
	""",
	"explanation_keywords": ["high", "n // 2", "return high", "return low", "floor", "boundary", "last valid"],
	"test_cases": [
	{"input": 16, "expected": 4},
	{"input": 8, "expected": 2},
	{"input": 1, "expected": 1},
	],
	"test_cases_description": "Integer square root: high=n//2, return high (floor result)",
	},
	{
	"task_id": "hard_015",
	"domain": "string algorithm",
	"instructions": (
	"The function implements the Z-algorithm to count pattern occurrences in text. "
	"Fix it AND explain what was wrong. "
	"Explanation must mention: concatenation, Z-array, separator, or index offset."
	),
	"buggy_code": """\
	def count_occurrences(text, pattern):
	concat = pattern + text
	n = len(concat)
	z = [0] * n
	l, r = 0, 0
	for i in range(1, n):
	if i < r:
	z[i] = min(r - i, z[i - l])
	while i + z[i] < n and concat[z[i]] == concat[i + z[i]]:
	z[i] += 1
	if i + z[i] > r:
	l, r = i, i + z[i]
	return sum(1 for i in range(len(pattern), n) if z[i] == len(pattern))
	""",
	"fixed_code": """\
	def count_occurrences(text, pattern):
	concat = pattern + '#' + text
	n = len(concat)
	z = [0] * n
	l, r = 0, 0
	for i in range(1, n):
	if i < r:
	z[i] = min(r - i, z[i - l])
	while i + z[i] < n and concat[z[i]] == concat[i + z[i]]:
	z[i] += 1
	if i + z[i] > r:
	l, r = i, i + z[i]
	p_len = len(pattern)
	return sum(1 for i in range(p_len + 1, n) if z[i] == p_len)
	""",
	"explanation_keywords": ["separator", "#", "without separator", "bleed", "p_len + 1", "offset", "boundary"],
	"test_cases": [
	{"input": ["aabxaabaab", "aab"], "expected": 3},
	{"input": ["hello world", "world"], "expected": 1},
	{"input": ["aaaa", "aa"], "expected": 3},
	],
	"test_cases_description": "Z-algorithm with '#' separator and corrected offset p_len+1",
	},
	]


	def get_random_hard_task() -> dict:
	return random.choice(HARD_TASKS).copy()


	def get_task_by_id(task_id: str) -> dict:
	for t in HARD_TASKS:
	if t["task_id"] == task_id:
	return t.copy()
	return random.choice(HARD_TASKS).copy()