sakthivarshans commited on
Commit
5c6ee3a
·
1 Parent(s): 1f677a3

Fix grader scores to be strictly in (0, 1)

Browse files
__pycache__/models.cpython-311.pyc CHANGED
Binary files a/__pycache__/models.cpython-311.pyc and b/__pycache__/models.cpython-311.pyc differ
 
database.db ADDED
File without changes
graders.py CHANGED
@@ -1,4 +1,3 @@
1
-
2
  import sys
3
  import os
4
  sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
@@ -9,145 +8,83 @@ from tasks import get_task
9
 
10
 
11
  _GRADER_TASKS = {
12
-
13
-
14
  "easy": [
15
-
16
- ("easy_001",
17
- "SELECT id, name, department, salary FROM employees WHERE salary > 70000 ORDER BY salary DESC"),
18
-
19
- ("easy_002",
20
- "SELECT category, COUNT(*) as count FROM products GROUP BY category"),
21
-
22
- ("easy_003",
23
- "SELECT name, price FROM products ORDER BY price DESC LIMIT 5"),
24
-
25
- ("easy_005",
26
- "SELECT SUM(price * quantity) as total_revenue FROM orders"),
27
-
28
- ("easy_007",
29
- "SELECT department, ROUND(AVG(salary),2) as avg_salary FROM employees GROUP BY department ORDER BY avg_salary DESC"),
30
  ],
31
-
32
-
33
  "medium": [
34
-
35
- ("medium_001",
36
- "SELECT c.name, COUNT(o.order_id) as order_count FROM customers c JOIN orders o ON c.customer_id=o.customer_id GROUP BY c.customer_id, c.name HAVING COUNT(o.order_id) > 1 ORDER BY order_count DESC"),
37
-
38
- ("medium_004",
39
- "SELECT d.dept_name, ROUND(AVG(e.salary), 2) as avg_salary FROM departments d JOIN employees e ON d.dept_id = e.dept_id GROUP BY d.dept_id, d.dept_name HAVING AVG(e.salary) > 80000 ORDER BY avg_salary DESC"),
40
-
41
- ("medium_007",
42
- "SELECT e.name, e.salary FROM employees e WHERE e.salary > (SELECT AVG(salary) FROM employees)"),
43
-
44
- ("medium_009",
45
- "SELECT d.dept_name, SUM(e.salary) as total_salary, SUM(e.salary) * 100.0 / (SELECT SUM(salary) FROM employees) as salary_pct FROM employees e JOIN departments d ON e.dept_id = d.dept_id GROUP BY d.dept_id, d.dept_name ORDER BY total_salary DESC"),
46
-
47
- ("medium_013",
48
- "SELECT MAX(salary) as second_highest_salary FROM employees WHERE salary < (SELECT MAX(salary) FROM employees)"),
49
  ],
50
-
51
-
52
  "hard": [
53
-
54
- ("hard_001",
55
- "SELECT c.name, c.city, SUM(o.amount) as total_amount FROM customers c JOIN orders o ON c.customer_id=o.customer_id GROUP BY c.customer_id, c.name, c.city ORDER BY c.city, total_amount DESC"),
56
-
57
- ("hard_003",
58
- "SELECT o.category, COUNT(r.return_id) * 100.0 / COUNT(o.order_id) as return_rate FROM orders o LEFT JOIN returns r ON o.order_id = r.order_id GROUP BY o.category ORDER BY return_rate DESC"),
59
-
60
- ("hard_006",
61
- "SELECT c.name FROM customers c JOIN orders o ON c.customer_id=o.customer_id WHERE o.category='Technology' GROUP BY c.customer_id, c.name"),
62
-
63
- ("hard_010",
64
- "SELECT o.category, SUM(o.amount) as total_revenue, AVG(o.amount) as avg_order_size, COUNT(DISTINCT o.customer_id) as unique_customers FROM orders o GROUP BY o.category ORDER BY total_revenue DESC"),
65
-
66
- ("hard_007",
67
- "SELECT c.name, o.order_date, o.amount FROM customers c JOIN orders o ON c.customer_id=o.customer_id ORDER BY c.name, o.order_date"),
68
  ],
69
-
70
-
71
  "expert": [
72
-
73
- ("expert_001",
74
- "SELECT a.holder_name, SUBSTR(t.txn_date,1,7) as month, ROUND(SUM(t.amount),2) as net_flow FROM accounts a JOIN transactions t ON a.account_id=t.account_id WHERE t.txn_date LIKE '2024%' GROUP BY a.account_id, a.holder_name, month ORDER BY a.holder_name, month"),
75
-
76
- ("expert_003",
77
- "SELECT a.holder_name, t.category, SUM(t.amount) as spend FROM accounts a JOIN transactions t ON a.account_id=t.account_id WHERE t.txn_type='debit' GROUP BY a.account_id, a.holder_name, t.category ORDER BY spend DESC"),
78
-
79
- ("expert_004",
80
- "SELECT a.holder_name, l.loan_type, l.principal, l.interest_rate, ROUND(l.principal * l.interest_rate / 100, 2) as annual_interest_cost FROM accounts a JOIN loans l ON a.account_id = l.account_id WHERE l.status = 'active' ORDER BY annual_interest_cost DESC"),
81
-
82
- ("expert_006",
83
- "SELECT a.holder_name FROM accounts a JOIN transactions t ON a.account_id=t.account_id WHERE t.category='salary' AND t.txn_type='credit' GROUP BY a.account_id HAVING COUNT(DISTINCT SUBSTR(t.txn_date,1,7)) >= 2"),
84
-
85
- ("expert_007",
86
- "SELECT a.holder_name, ROUND((SUM(CASE WHEN t.txn_type='credit' THEN t.amount ELSE 0 END) - SUM(CASE WHEN t.txn_type='debit' THEN t.amount ELSE 0 END)) * 100.0 / SUM(CASE WHEN t.txn_type='credit' THEN t.amount ELSE 0 END), 1) as savings_rate FROM accounts a JOIN transactions t ON a.account_id=t.account_id GROUP BY a.account_id, a.holder_name ORDER BY savings_rate DESC"),
87
  ],
88
  }
89
 
90
 
91
- def _run_task(task_id: str, sql: str) -> float:
92
-
93
  env = SQLArenaEnvironment()
94
  task = get_task(task_id)
95
  if task is None:
96
- return 0.4
97
  env.reset(task_id=task_id)
98
  obs = env.step(SQLArenaAction(sql=sql, query_type="submit"))
99
  reward = float(obs.reward) if obs.reward is not None else 0.0
100
  return reward
101
 
102
 
103
- def _grade_tier(tier: str) -> float:
104
-
105
  tasks = _GRADER_TASKS[tier]
106
  scores = []
107
  for task_id, sql in tasks:
108
  score = _run_task(task_id, sql)
109
  scores.append(score)
110
- print(f" {task_id}: {score:.4f}")
111
-
112
  mean = sum(scores) / len(scores)
113
-
114
-
115
  mean = max(0.01, min(0.99, mean))
116
  mean = round(mean, 4)
117
- print(f" {tier} score: {mean:.4f}")
118
  return mean
119
 
120
 
121
- def grade_easy() -> float:
122
-
123
- print("Running easy grader...")
124
  return _grade_tier("easy")
125
 
126
 
127
- def grade_medium() -> float:
128
-
129
- print("Running medium grader...")
130
  return _grade_tier("medium")
131
 
132
 
133
- def grade_hard() -> float:
134
-
135
- print("Running hard grader...")
136
  return _grade_tier("hard")
137
 
138
 
139
- def grade_expert() -> float:
140
-
141
- print("Running expert grader...")
142
  return _grade_tier("expert")
143
 
144
 
145
- def run_all_graders() -> dict:
146
-
147
  results = {
148
- "easy": grade_easy(),
149
  "medium": grade_medium(),
150
- "hard": grade_hard(),
151
  "expert": grade_expert(),
152
  }
153
  results["overall"] = round(
@@ -167,4 +104,4 @@ if __name__ == "__main__":
167
  print("=" * 50)
168
  for k, v in results.items():
169
  bar = "█" * int(v * 20)
170
- print(f" {k:10s}: {v:.4f} {bar}")
 
 
1
  import sys
2
  import os
3
  sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
 
8
 
9
 
10
  _GRADER_TASKS = {
 
 
11
  "easy": [
12
+ ("easy_001", "SELECT id, name, department, salary FROM employees WHERE salary > 70000 ORDER BY salary DESC"),
13
+ ("easy_002", "SELECT category, COUNT(*) as count FROM products GROUP BY category"),
14
+ ("easy_003", "SELECT name, price FROM products ORDER BY price DESC LIMIT 5"),
15
+ ("easy_005", "SELECT SUM(price * quantity) as total_revenue FROM orders"),
16
+ ("easy_007", "SELECT department, ROUND(AVG(salary),2) as avg_salary FROM employees GROUP BY department ORDER BY avg_salary DESC"),
 
 
 
 
 
 
 
 
 
 
17
  ],
 
 
18
  "medium": [
19
+ ("medium_001", "SELECT c.name, COUNT(o.order_id) as order_count FROM customers c JOIN orders o ON c.customer_id=o.customer_id GROUP BY c.customer_id, c.name HAVING COUNT(o.order_id) > 1 ORDER BY order_count DESC"),
20
+ ("medium_004", "SELECT d.dept_name, ROUND(AVG(e.salary), 2) as avg_salary FROM departments d JOIN employees e ON d.dept_id = e.dept_id GROUP BY d.dept_id, d.dept_name HAVING AVG(e.salary) > 80000 ORDER BY avg_salary DESC"),
21
+ ("medium_007", "SELECT e.name, e.salary FROM employees e WHERE e.salary > (SELECT AVG(salary) FROM employees)"),
22
+ ("medium_009", "SELECT d.dept_name, SUM(e.salary) as total_salary, SUM(e.salary) * 100.0 / (SELECT SUM(salary) FROM employees) as salary_pct FROM employees e JOIN departments d ON e.dept_id = d.dept_id GROUP BY d.dept_id, d.dept_name ORDER BY total_salary DESC"),
23
+ ("medium_013", "SELECT MAX(salary) as second_highest_salary FROM employees WHERE salary < (SELECT MAX(salary) FROM employees)"),
 
 
 
 
 
 
 
 
 
 
24
  ],
 
 
25
  "hard": [
26
+ ("hard_001", "SELECT c.name, c.city, SUM(o.amount) as total_amount FROM customers c JOIN orders o ON c.customer_id=o.customer_id GROUP BY c.customer_id, c.name, c.city ORDER BY c.city, total_amount DESC"),
27
+ ("hard_003", "SELECT o.category, COUNT(r.return_id) * 100.0 / COUNT(o.order_id) as return_rate FROM orders o LEFT JOIN returns r ON o.order_id = r.order_id GROUP BY o.category ORDER BY return_rate DESC"),
28
+ ("hard_006", "SELECT c.name FROM customers c JOIN orders o ON c.customer_id=o.customer_id WHERE o.category='Technology' GROUP BY c.customer_id, c.name"),
29
+ ("hard_010", "SELECT o.category, SUM(o.amount) as total_revenue, AVG(o.amount) as avg_order_size, COUNT(DISTINCT o.customer_id) as unique_customers FROM orders o GROUP BY o.category ORDER BY total_revenue DESC"),
30
+ ("hard_007", "SELECT c.name, o.order_date, o.amount FROM customers c JOIN orders o ON c.customer_id=o.customer_id ORDER BY c.name, o.order_date"),
 
 
 
 
 
 
 
 
 
 
31
  ],
 
 
32
  "expert": [
33
+ ("expert_001", "SELECT a.holder_name, SUBSTR(t.txn_date,1,7) as month, ROUND(SUM(t.amount),2) as net_flow FROM accounts a JOIN transactions t ON a.account_id=t.account_id WHERE t.txn_date LIKE '2024%' GROUP BY a.account_id, a.holder_name, month ORDER BY a.holder_name, month"),
34
+ ("expert_003", "SELECT a.holder_name, t.category, SUM(t.amount) as spend FROM accounts a JOIN transactions t ON a.account_id=t.account_id WHERE t.txn_type='debit' GROUP BY a.account_id, a.holder_name, t.category ORDER BY spend DESC"),
35
+ ("expert_004", "SELECT a.holder_name, l.loan_type, l.principal, l.interest_rate, ROUND(l.principal * l.interest_rate / 100, 2) as annual_interest_cost FROM accounts a JOIN loans l ON a.account_id = l.account_id WHERE l.status = 'active' ORDER BY annual_interest_cost DESC"),
36
+ ("expert_006", "SELECT a.holder_name FROM accounts a JOIN transactions t ON a.account_id=t.account_id WHERE t.category='salary' AND t.txn_type='credit' GROUP BY a.account_id HAVING COUNT(DISTINCT SUBSTR(t.txn_date,1,7)) >= 2"),
37
+ ("expert_007", "SELECT a.holder_name, ROUND((SUM(CASE WHEN t.txn_type='credit' THEN t.amount ELSE 0 END) - SUM(CASE WHEN t.txn_type='debit' THEN t.amount ELSE 0 END)) * 100.0 / SUM(CASE WHEN t.txn_type='credit' THEN t.amount ELSE 0 END), 1) as savings_rate FROM accounts a JOIN transactions t ON a.account_id=t.account_id GROUP BY a.account_id, a.holder_name ORDER BY savings_rate DESC"),
 
 
 
 
 
 
 
 
 
 
38
  ],
39
  }
40
 
41
 
42
+ def _run_task(task_id, sql):
 
43
  env = SQLArenaEnvironment()
44
  task = get_task(task_id)
45
  if task is None:
46
+ return 0.4
47
  env.reset(task_id=task_id)
48
  obs = env.step(SQLArenaAction(sql=sql, query_type="submit"))
49
  reward = float(obs.reward) if obs.reward is not None else 0.0
50
  return reward
51
 
52
 
53
+ def _grade_tier(tier):
 
54
  tasks = _GRADER_TASKS[tier]
55
  scores = []
56
  for task_id, sql in tasks:
57
  score = _run_task(task_id, sql)
58
  scores.append(score)
59
+ print(f"{task_id}: {score:.4f}")
 
60
  mean = sum(scores) / len(scores)
 
 
61
  mean = max(0.01, min(0.99, mean))
62
  mean = round(mean, 4)
63
+ print(f"{tier} score: {mean:.4f}")
64
  return mean
65
 
66
 
67
+ def grade_easy(env=None, action=None):
 
 
68
  return _grade_tier("easy")
69
 
70
 
71
+ def grade_medium(env=None, action=None):
 
 
72
  return _grade_tier("medium")
73
 
74
 
75
+ def grade_hard(env=None, action=None):
 
 
76
  return _grade_tier("hard")
77
 
78
 
79
+ def grade_expert(env=None, action=None):
 
 
80
  return _grade_tier("expert")
81
 
82
 
83
+ def run_all_graders():
 
84
  results = {
85
+ "easy": grade_easy(),
86
  "medium": grade_medium(),
87
+ "hard": grade_hard(),
88
  "expert": grade_expert(),
89
  }
90
  results["overall"] = round(
 
104
  print("=" * 50)
105
  for k, v in results.items():
106
  bar = "█" * int(v * 20)
107
+ print(f"{k:10s}: {v:.4f} {bar}")
openenv.yaml CHANGED
@@ -4,3 +4,20 @@ type: space
4
  runtime: fastapi
5
  app: server.app:app
6
  port: 8000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  runtime: fastapi
5
  app: server.app:app
6
  port: 8000
7
+
8
+ graders:
9
+ - id: easy
10
+ entry: graders:grade_easy
11
+ description: "Easy SQL tasks grader"
12
+
13
+ - id: medium
14
+ entry: graders:grade_medium
15
+ description: "Medium SQL tasks grader"
16
+
17
+ - id: hard
18
+ entry: graders:grade_hard
19
+ description: "Hard SQL tasks grader"
20
+
21
+ - id: expert
22
+ entry: graders:grade_expert
23
+ description: "Expert SQL tasks grader"
server/__pycache__/__init__.cpython-311.pyc CHANGED
Binary files a/server/__pycache__/__init__.cpython-311.pyc and b/server/__pycache__/__init__.cpython-311.pyc differ
 
server/__pycache__/sql_arena_environment.cpython-311.pyc CHANGED
Binary files a/server/__pycache__/sql_arena_environment.cpython-311.pyc and b/server/__pycache__/sql_arena_environment.cpython-311.pyc differ