Spaces:
Running
Running
| # server/tasks/task_hard.py | |
| # 15 hard tasks: algorithmic bugs + agent must explain what was wrong. | |
| # Reward is based on test pass rate PLUS explanation quality. | |
| import random | |
| HARD_TASKS = [ | |
| { | |
| "task_id": "hard_001", | |
| "domain": "sorting algorithm", | |
| "instructions": ( | |
| "The function implements bubble sort but is broken. " | |
| "Fix the algorithm AND explain what was wrong in your 'explanation' field. " | |
| "Explanation must mention: loop range, boundary, or swap logic." | |
| ), | |
| "buggy_code": """\ | |
| def bubble_sort(arr): | |
| n = len(arr) | |
| for i in range(n): | |
| for j in range(n - i): | |
| if arr[j] > arr[j + 1]: | |
| arr[j], arr[j + 1] = arr[j + 1], arr[j] | |
| return arr | |
| """, | |
| "fixed_code": """\ | |
| def bubble_sort(arr): | |
| n = len(arr) | |
| for i in range(n): | |
| for j in range(n - i - 1): | |
| if arr[j] > arr[j + 1]: | |
| arr[j], arr[j + 1] = arr[j + 1], arr[j] | |
| return arr | |
| """, | |
| "explanation_keywords": ["boundary", "index", "range", "n - i - 1", "out of bounds", "last element"], | |
| "test_cases": [ | |
| {"input": [64, 34, 25, 12, 22, 11, 90], "expected": [11, 12, 22, 25, 34, 64, 90]}, | |
| {"input": [5, 1, 4, 2, 8], "expected": [1, 2, 4, 5, 8]}, | |
| {"input": [1], "expected": [1]}, | |
| ], | |
| "test_cases_description": "Bubble sort with correct inner loop boundary (n - i - 1)", | |
| }, | |
| { | |
| "task_id": "hard_002", | |
| "domain": "dynamic programming", | |
| "instructions": ( | |
| "The function computes the longest increasing subsequence (LIS) length. " | |
| "Fix the algorithm AND explain what was wrong. " | |
| "Explanation must mention: initialization, dp transition, or base case." | |
| ), | |
| "buggy_code": """\ | |
| def lis_length(nums): | |
| if not nums: | |
| return 0 | |
| dp = [0] * len(nums) | |
| for i in range(len(nums)): | |
| for j in range(i): | |
| if nums[j] < nums[i]: | |
| dp[i] = max(dp[i], dp[j] + 1) | |
| return max(dp) | |
| """, | |
| "fixed_code": """\ | |
| def lis_length(nums): | |
| if not nums: | |
| return 0 | |
| dp = [1] * len(nums) | |
| for i in range(len(nums)): | |
| for j in range(i): | |
| if nums[j] < nums[i]: | |
| dp[i] = max(dp[i], dp[j] + 1) | |
| return max(dp) | |
| """, | |
| "explanation_keywords": ["initialization", "base case", "dp[i]", "1", "zero", "initial value"], | |
| "test_cases": [ | |
| {"input": [10, 9, 2, 5, 3, 7, 101, 18], "expected": 4}, | |
| {"input": [0, 1, 0, 3, 2, 3], "expected": 4}, | |
| {"input": [7, 7, 7, 7], "expected": 1}, | |
| ], | |
| "test_cases_description": "LIS with dp initialized to 1 (not 0)", | |
| }, | |
| { | |
| "task_id": "hard_003", | |
| "domain": "binary search", | |
| "instructions": ( | |
| "The function does binary search on a sorted list. " | |
| "Fix the algorithm AND explain what was wrong. " | |
| "Explanation must mention: mid calculation, overflow, boundary, or infinite loop." | |
| ), | |
| "buggy_code": """\ | |
| def binary_search(arr, target): | |
| low, high = 0, len(arr) | |
| while low < high: | |
| mid = (low + high) // 2 | |
| if arr[mid] == target: | |
| return mid | |
| elif arr[mid] < target: | |
| low = mid | |
| else: | |
| high = mid - 1 | |
| return -1 | |
| """, | |
| "fixed_code": """\ | |
| def binary_search(arr, target): | |
| low, high = 0, len(arr) - 1 | |
| while low <= high: | |
| mid = (low + high) // 2 | |
| if arr[mid] == target: | |
| return mid | |
| elif arr[mid] < target: | |
| low = mid + 1 | |
| else: | |
| high = mid - 1 | |
| return -1 | |
| """, | |
| "explanation_keywords": ["high", "len - 1", "low = mid", "infinite loop", "boundary", "off-by-one"], | |
| "test_cases": [ | |
| {"input": [[1, 3, 5, 7, 9], 7], "expected": 3}, | |
| {"input": [[1, 3, 5, 7, 9], 1], "expected": 0}, | |
| {"input": [[1, 3, 5, 7, 9], 6], "expected": -1}, | |
| ], | |
| "test_cases_description": "Binary search: high = len-1, low = mid+1, while low <= high", | |
| }, | |
| { | |
| "task_id": "hard_004", | |
| "domain": "dynamic programming", | |
| "instructions": ( | |
| "The function computes the minimum number of coins to make 'amount'. " | |
| "Fix the algorithm AND explain what was wrong. " | |
| "Explanation must mention: initialization, infinity, dp table, or base case." | |
| ), | |
| "buggy_code": """\ | |
| def coin_change(coins, amount): | |
| dp = [0] * (amount + 1) | |
| dp[0] = 0 | |
| for i in range(1, amount + 1): | |
| for coin in coins: | |
| if coin <= i: | |
| dp[i] = min(dp[i], dp[i - coin] + 1) | |
| return dp[amount] if dp[amount] != 0 else -1 | |
| """, | |
| "fixed_code": """\ | |
| def coin_change(coins, amount): | |
| dp = [float('inf')] * (amount + 1) | |
| dp[0] = 0 | |
| for i in range(1, amount + 1): | |
| for coin in coins: | |
| if coin <= i: | |
| dp[i] = min(dp[i], dp[i - coin] + 1) | |
| return dp[amount] if dp[amount] != float('inf') else -1 | |
| """, | |
| "explanation_keywords": ["infinity", "inf", "initialization", "0 instead of inf", "unreachable", "base"], | |
| "test_cases": [ | |
| {"input": [[1, 5, 6, 9], 11], "expected": 2}, | |
| {"input": [[2], 3], "expected": -1}, | |
| {"input": [[1, 2, 5], 11], "expected": 3}, | |
| ], | |
| "test_cases_description": "Coin change DP: initialized to inf, not 0", | |
| }, | |
| { | |
| "task_id": "hard_005", | |
| "domain": "graph algorithm", | |
| "instructions": ( | |
| "The function checks if a directed graph has a cycle using DFS. " | |
| "Fix it AND explain what was wrong. " | |
| "Explanation must mention: visited, recursion stack, back edge, or state." | |
| ), | |
| "buggy_code": """\ | |
| def has_cycle(graph): | |
| visited = set() | |
| def dfs(node): | |
| visited.add(node) | |
| for neighbor in graph.get(node, []): | |
| if neighbor in visited: | |
| return True | |
| if dfs(neighbor): | |
| return True | |
| return False | |
| for node in graph: | |
| if node not in visited: | |
| if dfs(node): | |
| return True | |
| return False | |
| """, | |
| "fixed_code": """\ | |
| def has_cycle(graph): | |
| visited = set() | |
| rec_stack = set() | |
| def dfs(node): | |
| visited.add(node) | |
| rec_stack.add(node) | |
| for neighbor in graph.get(node, []): | |
| if neighbor not in visited: | |
| if dfs(neighbor): | |
| return True | |
| elif neighbor in rec_stack: | |
| return True | |
| rec_stack.remove(node) | |
| return False | |
| for node in graph: | |
| if node not in visited: | |
| if dfs(node): | |
| return True | |
| return False | |
| """, | |
| "explanation_keywords": ["recursion stack", "rec_stack", "back edge", "visited", "false positive", "path"], | |
| "test_cases": [ | |
| {"input": {"A": ["B"], "B": ["C"], "C": ["A"]}, "expected": True}, | |
| {"input": {"A": ["B"], "B": ["C"], "C": []}, "expected": False}, | |
| {"input": {"A": ["B", "C"], "B": ["D"], "C": ["D"], "D": []}, "expected": False}, | |
| ], | |
| "test_cases_description": "Cycle detection in directed graph using recursion stack", | |
| }, | |
| { | |
| "task_id": "hard_006", | |
| "domain": "dynamic programming", | |
| "instructions": ( | |
| "The function computes the maximum subarray sum (Kadane's algorithm). " | |
| "Fix it AND explain what was wrong. " | |
| "Explanation must mention: initialization, negative numbers, current_sum, or reset." | |
| ), | |
| "buggy_code": """\ | |
| def max_subarray(nums): | |
| max_sum = 0 | |
| current_sum = 0 | |
| for n in nums: | |
| current_sum = max(n, current_sum + n) | |
| max_sum = max(max_sum, current_sum) | |
| return max_sum | |
| """, | |
| "fixed_code": """\ | |
| def max_subarray(nums): | |
| max_sum = nums[0] | |
| current_sum = nums[0] | |
| for n in nums[1:]: | |
| current_sum = max(n, current_sum + n) | |
| max_sum = max(max_sum, current_sum) | |
| return max_sum | |
| """, | |
| "explanation_keywords": ["initialization", "negative", "nums[0]", "all negative", "zero", "initial"], | |
| "test_cases": [ | |
| {"input": [-2, 1, -3, 4, -1, 2, 1, -5, 4], "expected": 6}, | |
| {"input": [-1, -2, -3, -4], "expected": -1}, | |
| {"input": [1], "expected": 1}, | |
| ], | |
| "test_cases_description": "Kadane's algorithm handles all-negative arrays", | |
| }, | |
| { | |
| "task_id": "hard_007", | |
| "domain": "string algorithm", | |
| "instructions": ( | |
| "The function checks if a string has balanced brackets. " | |
| "Fix it AND explain what was wrong. " | |
| "Explanation must mention: stack, matching, empty stack, or closing bracket." | |
| ), | |
| "buggy_code": """\ | |
| def is_balanced(s): | |
| stack = [] | |
| matching = {')': '(', ']': '[', '}': '{'} | |
| for ch in s: | |
| if ch in '([{': | |
| stack.append(ch) | |
| elif ch in ')]}': | |
| if stack and stack[-1] == matching[ch]: | |
| stack.pop() | |
| return len(stack) == 0 | |
| """, | |
| "fixed_code": """\ | |
| def is_balanced(s): | |
| stack = [] | |
| matching = {')': '(', ']': '[', '}': '{'} | |
| for ch in s: | |
| if ch in '([{': | |
| stack.append(ch) | |
| elif ch in ')]}': | |
| if not stack or stack[-1] != matching[ch]: | |
| return False | |
| stack.pop() | |
| return len(stack) == 0 | |
| """, | |
| "explanation_keywords": ["stack", "empty stack", "mismatch", "not stack", "early return", "closing"], | |
| "test_cases": [ | |
| {"input": "([{}])", "expected": True}, | |
| {"input": "([)]", "expected": False}, | |
| {"input": "]", "expected": False}, | |
| ], | |
| "test_cases_description": "Balanced brackets: early return False on mismatch or empty stack", | |
| }, | |
| { | |
| "task_id": "hard_008", | |
| "domain": "dynamic programming", | |
| "instructions": ( | |
| "The function computes the number of ways to climb n stairs (1 or 2 steps at a time). " | |
| "Fix it AND explain what was wrong. " | |
| "Explanation must mention: base case, dp, index, or off-by-one." | |
| ), | |
| "buggy_code": """\ | |
| def climb_stairs(n): | |
| if n <= 0: | |
| return 0 | |
| dp = [0] * (n + 1) | |
| dp[0] = 1 | |
| dp[1] = 1 | |
| for i in range(3, n + 1): | |
| dp[i] = dp[i - 1] + dp[i - 2] | |
| return dp[n] | |
| """, | |
| "fixed_code": """\ | |
| def climb_stairs(n): | |
| if n <= 0: | |
| return 0 | |
| dp = [0] * (n + 1) | |
| dp[0] = 1 | |
| dp[1] = 1 | |
| for i in range(2, n + 1): | |
| dp[i] = dp[i - 1] + dp[i - 2] | |
| return dp[n] | |
| """, | |
| "explanation_keywords": ["range", "starts at 3", "range(2", "off-by-one", "dp[2]", "skipped"], | |
| "test_cases": [ | |
| {"input": 2, "expected": 2}, | |
| {"input": 3, "expected": 3}, | |
| {"input": 5, "expected": 8}, | |
| ], | |
| "test_cases_description": "Climb stairs DP: loop starts at range(2, ...) not range(3, ...)", | |
| }, | |
| { | |
| "task_id": "hard_009", | |
| "domain": "data processing", | |
| "instructions": ( | |
| "The function implements quicksort. " | |
| "Fix it AND explain what was wrong. " | |
| "Explanation must mention: pivot, partition, recursion, or base case." | |
| ), | |
| "buggy_code": """\ | |
| def quicksort(arr): | |
| if len(arr) <= 1: | |
| return arr | |
| pivot = arr[0] | |
| left = [x for x in arr if x < pivot] | |
| right = [x for x in arr if x > pivot] | |
| return quicksort(left) + [pivot] + quicksort(right) | |
| """, | |
| "fixed_code": """\ | |
| def quicksort(arr): | |
| if len(arr) <= 1: | |
| return arr | |
| pivot = arr[0] | |
| left = [x for x in arr[1:] if x <= pivot] | |
| right = [x for x in arr[1:] if x > pivot] | |
| return quicksort(left) + [pivot] + quicksort(right) | |
| """, | |
| "explanation_keywords": ["duplicate", "arr[1:]", "pivot included", "equal", "lost", "missing"], | |
| "test_cases": [ | |
| {"input": [3, 6, 8, 10, 1, 2, 1], "expected": [1, 1, 2, 3, 6, 8, 10]}, | |
| {"input": [5, 5, 5], "expected": [5, 5, 5]}, | |
| {"input": [1], "expected": [1]}, | |
| ], | |
| "test_cases_description": "Quicksort handles duplicates: arr[1:] and x <= pivot", | |
| }, | |
| { | |
| "task_id": "hard_010", | |
| "domain": "graph algorithm", | |
| "instructions": ( | |
| "The function finds the shortest path length in an unweighted graph using BFS. " | |
| "Fix it AND explain what was wrong. " | |
| "Explanation must mention: visited, queue, infinite loop, or distance tracking." | |
| ), | |
| "buggy_code": """\ | |
| from collections import deque | |
| def bfs_shortest_path(graph, start, end): | |
| queue = deque([(start, 0)]) | |
| while queue: | |
| node, dist = queue.popleft() | |
| if node == end: | |
| return dist | |
| for neighbor in graph.get(node, []): | |
| queue.append((neighbor, dist + 1)) | |
| return -1 | |
| """, | |
| "fixed_code": """\ | |
| from collections import deque | |
| def bfs_shortest_path(graph, start, end): | |
| visited = set([start]) | |
| queue = deque([(start, 0)]) | |
| while queue: | |
| node, dist = queue.popleft() | |
| if node == end: | |
| return dist | |
| for neighbor in graph.get(node, []): | |
| if neighbor not in visited: | |
| visited.add(neighbor) | |
| queue.append((neighbor, dist + 1)) | |
| return -1 | |
| """, | |
| "explanation_keywords": ["visited", "infinite loop", "revisit", "cycle", "set", "already visited"], | |
| "test_cases": [ | |
| {"input": [{"A": ["B", "C"], "B": ["D"], "C": ["D"], "D": []}, "A", "D"], "expected": 2}, | |
| {"input": [{"A": ["B"], "B": ["A"]}, "A", "B"], "expected": 1}, | |
| {"input": [{"A": ["B"]}, "A", "C"], "expected": -1}, | |
| ], | |
| "test_cases_description": "BFS shortest path with visited set to prevent revisiting", | |
| }, | |
| { | |
| "task_id": "hard_011", | |
| "domain": "dynamic programming", | |
| "instructions": ( | |
| "The function computes the 0/1 knapsack maximum value. " | |
| "Fix it AND explain what was wrong. " | |
| "Explanation must mention: capacity, dp table, iteration order, or overwrite." | |
| ), | |
| "buggy_code": """\ | |
| def knapsack(weights, values, capacity): | |
| n = len(weights) | |
| dp = [0] * (capacity + 1) | |
| for i in range(n): | |
| for w in range(weights[i], capacity + 1): | |
| dp[w] = max(dp[w], dp[w - weights[i]] + values[i]) | |
| return dp[capacity] | |
| """, | |
| "fixed_code": """\ | |
| def knapsack(weights, values, capacity): | |
| n = len(weights) | |
| dp = [0] * (capacity + 1) | |
| for i in range(n): | |
| for w in range(capacity, weights[i] - 1, -1): | |
| dp[w] = max(dp[w], dp[w - weights[i]] + values[i]) | |
| return dp[capacity] | |
| """, | |
| "explanation_keywords": ["reverse", "backward", "overwrite", "0/1", "unbounded", "iteration order", "right to left"], | |
| "test_cases": [ | |
| {"input": [[2, 3, 4, 5], [3, 4, 5, 6], 5], "expected": 7}, | |
| {"input": [[1, 2, 3], [6, 10, 12], 5], "expected": 22}, | |
| {"input": [[5], [10], 3], "expected": 0}, | |
| ], | |
| "test_cases_description": "0/1 Knapsack: inner loop must go backward to avoid using item twice", | |
| }, | |
| { | |
| "task_id": "hard_012", | |
| "domain": "string algorithm", | |
| "instructions": ( | |
| "The function finds the length of the longest substring without repeating characters. " | |
| "Fix it AND explain what was wrong. " | |
| "Explanation must mention: window, pointer, index, or update." | |
| ), | |
| "buggy_code": """\ | |
| def length_of_longest_substring(s): | |
| char_index = {} | |
| left = 0 | |
| max_len = 0 | |
| for right, ch in enumerate(s): | |
| if ch in char_index: | |
| left = char_index[ch] + 1 | |
| char_index[ch] = right | |
| max_len = max(max_len, right - left + 1) | |
| return max_len | |
| """, | |
| "fixed_code": """\ | |
| def length_of_longest_substring(s): | |
| char_index = {} | |
| left = 0 | |
| max_len = 0 | |
| for right, ch in enumerate(s): | |
| if ch in char_index and char_index[ch] >= left: | |
| left = char_index[ch] + 1 | |
| char_index[ch] = right | |
| max_len = max(max_len, right - left + 1) | |
| return max_len | |
| """, | |
| "explanation_keywords": ["left pointer", "stale", "char_index[ch] >= left", "window", "shrink", "old index"], | |
| "test_cases": [ | |
| {"input": "abcabcbb", "expected": 3}, | |
| {"input": "bbbbb", "expected": 1}, | |
| {"input": "pwwkew", "expected": 3}, | |
| ], | |
| "test_cases_description": "Longest substring without repeating: only update left if char is within current window", | |
| }, | |
| { | |
| "task_id": "hard_013", | |
| "domain": "data processing", | |
| "instructions": ( | |
| "The function merges overlapping intervals. " | |
| "Fix it AND explain what was wrong. " | |
| "Explanation must mention: sort, overlap, merge condition, or end index." | |
| ), | |
| "buggy_code": """\ | |
| def merge_intervals(intervals): | |
| if not intervals: | |
| return [] | |
| intervals.sort(key=lambda x: x[0]) | |
| merged = [intervals[0]] | |
| for start, end in intervals[1:]: | |
| if start <= merged[-1][0]: | |
| merged[-1][1] = max(merged[-1][1], end) | |
| else: | |
| merged.append([start, end]) | |
| return merged | |
| """, | |
| "fixed_code": """\ | |
| def merge_intervals(intervals): | |
| if not intervals: | |
| return [] | |
| intervals.sort(key=lambda x: x[0]) | |
| merged = [intervals[0]] | |
| for start, end in intervals[1:]: | |
| if start <= merged[-1][1]: | |
| merged[-1][1] = max(merged[-1][1], end) | |
| else: | |
| merged.append([start, end]) | |
| return merged | |
| """, | |
| "explanation_keywords": ["merged[-1][1]", "end", "start", "overlap", "last interval", "index 1 vs 0"], | |
| "test_cases": [ | |
| {"input": [[[1, 3], [2, 6], [8, 10]]], "expected": [[1, 6], [8, 10]]}, | |
| {"input": [[[1, 4], [4, 5]]], "expected": [[1, 5]]}, | |
| {"input": [[[1, 2]]], "expected": [[1, 2]]}, | |
| ], | |
| "test_cases_description": "Merge intervals: compare start with merged[-1][1] (end), not [0] (start)", | |
| }, | |
| { | |
| "task_id": "hard_014", | |
| "domain": "math", | |
| "instructions": ( | |
| "The function does integer square root (floor) without using sqrt(). " | |
| "Fix it AND explain what was wrong. " | |
| "Explanation must mention: binary search, convergence, mid, or boundary." | |
| ), | |
| "buggy_code": """\ | |
| def integer_sqrt(n): | |
| if n < 2: | |
| return n | |
| low, high = 1, n | |
| while low <= high: | |
| mid = (low + high) // 2 | |
| if mid * mid == n: | |
| return mid | |
| elif mid * mid < n: | |
| low = mid + 1 | |
| else: | |
| high = mid - 1 | |
| return low | |
| """, | |
| "fixed_code": """\ | |
| def integer_sqrt(n): | |
| if n < 2: | |
| return n | |
| low, high = 1, n // 2 | |
| while low <= high: | |
| mid = (low + high) // 2 | |
| if mid * mid == n: | |
| return mid | |
| elif mid * mid < n: | |
| low = mid + 1 | |
| else: | |
| high = mid - 1 | |
| return high | |
| """, | |
| "explanation_keywords": ["high", "n // 2", "return high", "return low", "floor", "boundary", "last valid"], | |
| "test_cases": [ | |
| {"input": 16, "expected": 4}, | |
| {"input": 8, "expected": 2}, | |
| {"input": 1, "expected": 1}, | |
| ], | |
| "test_cases_description": "Integer square root: high=n//2, return high (floor result)", | |
| }, | |
| { | |
| "task_id": "hard_015", | |
| "domain": "string algorithm", | |
| "instructions": ( | |
| "The function implements the Z-algorithm to count pattern occurrences in text. " | |
| "Fix it AND explain what was wrong. " | |
| "Explanation must mention: concatenation, Z-array, separator, or index offset." | |
| ), | |
| "buggy_code": """\ | |
| def count_occurrences(text, pattern): | |
| concat = pattern + text | |
| n = len(concat) | |
| z = [0] * n | |
| l, r = 0, 0 | |
| for i in range(1, n): | |
| if i < r: | |
| z[i] = min(r - i, z[i - l]) | |
| while i + z[i] < n and concat[z[i]] == concat[i + z[i]]: | |
| z[i] += 1 | |
| if i + z[i] > r: | |
| l, r = i, i + z[i] | |
| return sum(1 for i in range(len(pattern), n) if z[i] == len(pattern)) | |
| """, | |
| "fixed_code": """\ | |
| def count_occurrences(text, pattern): | |
| concat = pattern + '#' + text | |
| n = len(concat) | |
| z = [0] * n | |
| l, r = 0, 0 | |
| for i in range(1, n): | |
| if i < r: | |
| z[i] = min(r - i, z[i - l]) | |
| while i + z[i] < n and concat[z[i]] == concat[i + z[i]]: | |
| z[i] += 1 | |
| if i + z[i] > r: | |
| l, r = i, i + z[i] | |
| p_len = len(pattern) | |
| return sum(1 for i in range(p_len + 1, n) if z[i] == p_len) | |
| """, | |
| "explanation_keywords": ["separator", "#", "without separator", "bleed", "p_len + 1", "offset", "boundary"], | |
| "test_cases": [ | |
| {"input": ["aabxaabaab", "aab"], "expected": 3}, | |
| {"input": ["hello world", "world"], "expected": 1}, | |
| {"input": ["aaaa", "aa"], "expected": 3}, | |
| ], | |
| "test_cases_description": "Z-algorithm with '#' separator and corrected offset p_len+1", | |
| }, | |
| ] | |
| def get_random_hard_task() -> dict: | |
| return random.choice(HARD_TASKS).copy() | |
| def get_task_by_id(task_id: str) -> dict: | |
| for t in HARD_TASKS: | |
| if t["task_id"] == task_id: | |
| return t.copy() | |
| return random.choice(HARD_TASKS).copy() | |