Spaces:
Sleeping
Sleeping
Commit ·
5c6ee3a
1
Parent(s): 1f677a3
Fix grader scores to be strictly in (0, 1)
Browse files- __pycache__/models.cpython-311.pyc +0 -0
- database.db +0 -0
- graders.py +33 -96
- openenv.yaml +17 -0
- server/__pycache__/__init__.cpython-311.pyc +0 -0
- server/__pycache__/sql_arena_environment.cpython-311.pyc +0 -0
__pycache__/models.cpython-311.pyc
CHANGED
|
Binary files a/__pycache__/models.cpython-311.pyc and b/__pycache__/models.cpython-311.pyc differ
|
|
|
database.db
ADDED
|
File without changes
|
graders.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
| 1 |
-
|
| 2 |
import sys
|
| 3 |
import os
|
| 4 |
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
@@ -9,145 +8,83 @@ from tasks import get_task
|
|
| 9 |
|
| 10 |
|
| 11 |
_GRADER_TASKS = {
|
| 12 |
-
|
| 13 |
-
|
| 14 |
"easy": [
|
| 15 |
-
|
| 16 |
-
("
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
("
|
| 20 |
-
"SELECT category, COUNT(*) as count FROM products GROUP BY category"),
|
| 21 |
-
|
| 22 |
-
("easy_003",
|
| 23 |
-
"SELECT name, price FROM products ORDER BY price DESC LIMIT 5"),
|
| 24 |
-
|
| 25 |
-
("easy_005",
|
| 26 |
-
"SELECT SUM(price * quantity) as total_revenue FROM orders"),
|
| 27 |
-
|
| 28 |
-
("easy_007",
|
| 29 |
-
"SELECT department, ROUND(AVG(salary),2) as avg_salary FROM employees GROUP BY department ORDER BY avg_salary DESC"),
|
| 30 |
],
|
| 31 |
-
|
| 32 |
-
|
| 33 |
"medium": [
|
| 34 |
-
|
| 35 |
-
("
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
("
|
| 39 |
-
"SELECT d.dept_name, ROUND(AVG(e.salary), 2) as avg_salary FROM departments d JOIN employees e ON d.dept_id = e.dept_id GROUP BY d.dept_id, d.dept_name HAVING AVG(e.salary) > 80000 ORDER BY avg_salary DESC"),
|
| 40 |
-
|
| 41 |
-
("medium_007",
|
| 42 |
-
"SELECT e.name, e.salary FROM employees e WHERE e.salary > (SELECT AVG(salary) FROM employees)"),
|
| 43 |
-
|
| 44 |
-
("medium_009",
|
| 45 |
-
"SELECT d.dept_name, SUM(e.salary) as total_salary, SUM(e.salary) * 100.0 / (SELECT SUM(salary) FROM employees) as salary_pct FROM employees e JOIN departments d ON e.dept_id = d.dept_id GROUP BY d.dept_id, d.dept_name ORDER BY total_salary DESC"),
|
| 46 |
-
|
| 47 |
-
("medium_013",
|
| 48 |
-
"SELECT MAX(salary) as second_highest_salary FROM employees WHERE salary < (SELECT MAX(salary) FROM employees)"),
|
| 49 |
],
|
| 50 |
-
|
| 51 |
-
|
| 52 |
"hard": [
|
| 53 |
-
|
| 54 |
-
("
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
("
|
| 58 |
-
"SELECT o.category, COUNT(r.return_id) * 100.0 / COUNT(o.order_id) as return_rate FROM orders o LEFT JOIN returns r ON o.order_id = r.order_id GROUP BY o.category ORDER BY return_rate DESC"),
|
| 59 |
-
|
| 60 |
-
("hard_006",
|
| 61 |
-
"SELECT c.name FROM customers c JOIN orders o ON c.customer_id=o.customer_id WHERE o.category='Technology' GROUP BY c.customer_id, c.name"),
|
| 62 |
-
|
| 63 |
-
("hard_010",
|
| 64 |
-
"SELECT o.category, SUM(o.amount) as total_revenue, AVG(o.amount) as avg_order_size, COUNT(DISTINCT o.customer_id) as unique_customers FROM orders o GROUP BY o.category ORDER BY total_revenue DESC"),
|
| 65 |
-
|
| 66 |
-
("hard_007",
|
| 67 |
-
"SELECT c.name, o.order_date, o.amount FROM customers c JOIN orders o ON c.customer_id=o.customer_id ORDER BY c.name, o.order_date"),
|
| 68 |
],
|
| 69 |
-
|
| 70 |
-
|
| 71 |
"expert": [
|
| 72 |
-
|
| 73 |
-
("
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
("
|
| 77 |
-
"SELECT a.holder_name, t.category, SUM(t.amount) as spend FROM accounts a JOIN transactions t ON a.account_id=t.account_id WHERE t.txn_type='debit' GROUP BY a.account_id, a.holder_name, t.category ORDER BY spend DESC"),
|
| 78 |
-
|
| 79 |
-
("expert_004",
|
| 80 |
-
"SELECT a.holder_name, l.loan_type, l.principal, l.interest_rate, ROUND(l.principal * l.interest_rate / 100, 2) as annual_interest_cost FROM accounts a JOIN loans l ON a.account_id = l.account_id WHERE l.status = 'active' ORDER BY annual_interest_cost DESC"),
|
| 81 |
-
|
| 82 |
-
("expert_006",
|
| 83 |
-
"SELECT a.holder_name FROM accounts a JOIN transactions t ON a.account_id=t.account_id WHERE t.category='salary' AND t.txn_type='credit' GROUP BY a.account_id HAVING COUNT(DISTINCT SUBSTR(t.txn_date,1,7)) >= 2"),
|
| 84 |
-
|
| 85 |
-
("expert_007",
|
| 86 |
-
"SELECT a.holder_name, ROUND((SUM(CASE WHEN t.txn_type='credit' THEN t.amount ELSE 0 END) - SUM(CASE WHEN t.txn_type='debit' THEN t.amount ELSE 0 END)) * 100.0 / SUM(CASE WHEN t.txn_type='credit' THEN t.amount ELSE 0 END), 1) as savings_rate FROM accounts a JOIN transactions t ON a.account_id=t.account_id GROUP BY a.account_id, a.holder_name ORDER BY savings_rate DESC"),
|
| 87 |
],
|
| 88 |
}
|
| 89 |
|
| 90 |
|
| 91 |
-
def _run_task(task_id
|
| 92 |
-
|
| 93 |
env = SQLArenaEnvironment()
|
| 94 |
task = get_task(task_id)
|
| 95 |
if task is None:
|
| 96 |
-
return 0.4
|
| 97 |
env.reset(task_id=task_id)
|
| 98 |
obs = env.step(SQLArenaAction(sql=sql, query_type="submit"))
|
| 99 |
reward = float(obs.reward) if obs.reward is not None else 0.0
|
| 100 |
return reward
|
| 101 |
|
| 102 |
|
| 103 |
-
def _grade_tier(tier
|
| 104 |
-
|
| 105 |
tasks = _GRADER_TASKS[tier]
|
| 106 |
scores = []
|
| 107 |
for task_id, sql in tasks:
|
| 108 |
score = _run_task(task_id, sql)
|
| 109 |
scores.append(score)
|
| 110 |
-
print(f"
|
| 111 |
-
|
| 112 |
mean = sum(scores) / len(scores)
|
| 113 |
-
|
| 114 |
-
|
| 115 |
mean = max(0.01, min(0.99, mean))
|
| 116 |
mean = round(mean, 4)
|
| 117 |
-
print(f"
|
| 118 |
return mean
|
| 119 |
|
| 120 |
|
| 121 |
-
def grade_easy(
|
| 122 |
-
|
| 123 |
-
print("Running easy grader...")
|
| 124 |
return _grade_tier("easy")
|
| 125 |
|
| 126 |
|
| 127 |
-
def grade_medium(
|
| 128 |
-
|
| 129 |
-
print("Running medium grader...")
|
| 130 |
return _grade_tier("medium")
|
| 131 |
|
| 132 |
|
| 133 |
-
def grade_hard(
|
| 134 |
-
|
| 135 |
-
print("Running hard grader...")
|
| 136 |
return _grade_tier("hard")
|
| 137 |
|
| 138 |
|
| 139 |
-
def grade_expert(
|
| 140 |
-
|
| 141 |
-
print("Running expert grader...")
|
| 142 |
return _grade_tier("expert")
|
| 143 |
|
| 144 |
|
| 145 |
-
def run_all_graders()
|
| 146 |
-
|
| 147 |
results = {
|
| 148 |
-
"easy":
|
| 149 |
"medium": grade_medium(),
|
| 150 |
-
"hard":
|
| 151 |
"expert": grade_expert(),
|
| 152 |
}
|
| 153 |
results["overall"] = round(
|
|
@@ -167,4 +104,4 @@ if __name__ == "__main__":
|
|
| 167 |
print("=" * 50)
|
| 168 |
for k, v in results.items():
|
| 169 |
bar = "█" * int(v * 20)
|
| 170 |
-
print(f"
|
|
|
|
|
|
|
| 1 |
import sys
|
| 2 |
import os
|
| 3 |
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
|
|
| 8 |
|
| 9 |
|
| 10 |
_GRADER_TASKS = {
|
|
|
|
|
|
|
| 11 |
"easy": [
|
| 12 |
+
("easy_001", "SELECT id, name, department, salary FROM employees WHERE salary > 70000 ORDER BY salary DESC"),
|
| 13 |
+
("easy_002", "SELECT category, COUNT(*) as count FROM products GROUP BY category"),
|
| 14 |
+
("easy_003", "SELECT name, price FROM products ORDER BY price DESC LIMIT 5"),
|
| 15 |
+
("easy_005", "SELECT SUM(price * quantity) as total_revenue FROM orders"),
|
| 16 |
+
("easy_007", "SELECT department, ROUND(AVG(salary),2) as avg_salary FROM employees GROUP BY department ORDER BY avg_salary DESC"),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
],
|
|
|
|
|
|
|
| 18 |
"medium": [
|
| 19 |
+
("medium_001", "SELECT c.name, COUNT(o.order_id) as order_count FROM customers c JOIN orders o ON c.customer_id=o.customer_id GROUP BY c.customer_id, c.name HAVING COUNT(o.order_id) > 1 ORDER BY order_count DESC"),
|
| 20 |
+
("medium_004", "SELECT d.dept_name, ROUND(AVG(e.salary), 2) as avg_salary FROM departments d JOIN employees e ON d.dept_id = e.dept_id GROUP BY d.dept_id, d.dept_name HAVING AVG(e.salary) > 80000 ORDER BY avg_salary DESC"),
|
| 21 |
+
("medium_007", "SELECT e.name, e.salary FROM employees e WHERE e.salary > (SELECT AVG(salary) FROM employees)"),
|
| 22 |
+
("medium_009", "SELECT d.dept_name, SUM(e.salary) as total_salary, SUM(e.salary) * 100.0 / (SELECT SUM(salary) FROM employees) as salary_pct FROM employees e JOIN departments d ON e.dept_id = d.dept_id GROUP BY d.dept_id, d.dept_name ORDER BY total_salary DESC"),
|
| 23 |
+
("medium_013", "SELECT MAX(salary) as second_highest_salary FROM employees WHERE salary < (SELECT MAX(salary) FROM employees)"),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
],
|
|
|
|
|
|
|
| 25 |
"hard": [
|
| 26 |
+
("hard_001", "SELECT c.name, c.city, SUM(o.amount) as total_amount FROM customers c JOIN orders o ON c.customer_id=o.customer_id GROUP BY c.customer_id, c.name, c.city ORDER BY c.city, total_amount DESC"),
|
| 27 |
+
("hard_003", "SELECT o.category, COUNT(r.return_id) * 100.0 / COUNT(o.order_id) as return_rate FROM orders o LEFT JOIN returns r ON o.order_id = r.order_id GROUP BY o.category ORDER BY return_rate DESC"),
|
| 28 |
+
("hard_006", "SELECT c.name FROM customers c JOIN orders o ON c.customer_id=o.customer_id WHERE o.category='Technology' GROUP BY c.customer_id, c.name"),
|
| 29 |
+
("hard_010", "SELECT o.category, SUM(o.amount) as total_revenue, AVG(o.amount) as avg_order_size, COUNT(DISTINCT o.customer_id) as unique_customers FROM orders o GROUP BY o.category ORDER BY total_revenue DESC"),
|
| 30 |
+
("hard_007", "SELECT c.name, o.order_date, o.amount FROM customers c JOIN orders o ON c.customer_id=o.customer_id ORDER BY c.name, o.order_date"),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
],
|
|
|
|
|
|
|
| 32 |
"expert": [
|
| 33 |
+
("expert_001", "SELECT a.holder_name, SUBSTR(t.txn_date,1,7) as month, ROUND(SUM(t.amount),2) as net_flow FROM accounts a JOIN transactions t ON a.account_id=t.account_id WHERE t.txn_date LIKE '2024%' GROUP BY a.account_id, a.holder_name, month ORDER BY a.holder_name, month"),
|
| 34 |
+
("expert_003", "SELECT a.holder_name, t.category, SUM(t.amount) as spend FROM accounts a JOIN transactions t ON a.account_id=t.account_id WHERE t.txn_type='debit' GROUP BY a.account_id, a.holder_name, t.category ORDER BY spend DESC"),
|
| 35 |
+
("expert_004", "SELECT a.holder_name, l.loan_type, l.principal, l.interest_rate, ROUND(l.principal * l.interest_rate / 100, 2) as annual_interest_cost FROM accounts a JOIN loans l ON a.account_id = l.account_id WHERE l.status = 'active' ORDER BY annual_interest_cost DESC"),
|
| 36 |
+
("expert_006", "SELECT a.holder_name FROM accounts a JOIN transactions t ON a.account_id=t.account_id WHERE t.category='salary' AND t.txn_type='credit' GROUP BY a.account_id HAVING COUNT(DISTINCT SUBSTR(t.txn_date,1,7)) >= 2"),
|
| 37 |
+
("expert_007", "SELECT a.holder_name, ROUND((SUM(CASE WHEN t.txn_type='credit' THEN t.amount ELSE 0 END) - SUM(CASE WHEN t.txn_type='debit' THEN t.amount ELSE 0 END)) * 100.0 / SUM(CASE WHEN t.txn_type='credit' THEN t.amount ELSE 0 END), 1) as savings_rate FROM accounts a JOIN transactions t ON a.account_id=t.account_id GROUP BY a.account_id, a.holder_name ORDER BY savings_rate DESC"),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
],
|
| 39 |
}
|
| 40 |
|
| 41 |
|
| 42 |
+
def _run_task(task_id, sql):
|
|
|
|
| 43 |
env = SQLArenaEnvironment()
|
| 44 |
task = get_task(task_id)
|
| 45 |
if task is None:
|
| 46 |
+
return 0.4
|
| 47 |
env.reset(task_id=task_id)
|
| 48 |
obs = env.step(SQLArenaAction(sql=sql, query_type="submit"))
|
| 49 |
reward = float(obs.reward) if obs.reward is not None else 0.0
|
| 50 |
return reward
|
| 51 |
|
| 52 |
|
| 53 |
+
def _grade_tier(tier):
|
|
|
|
| 54 |
tasks = _GRADER_TASKS[tier]
|
| 55 |
scores = []
|
| 56 |
for task_id, sql in tasks:
|
| 57 |
score = _run_task(task_id, sql)
|
| 58 |
scores.append(score)
|
| 59 |
+
print(f"{task_id}: {score:.4f}")
|
|
|
|
| 60 |
mean = sum(scores) / len(scores)
|
|
|
|
|
|
|
| 61 |
mean = max(0.01, min(0.99, mean))
|
| 62 |
mean = round(mean, 4)
|
| 63 |
+
print(f"{tier} score: {mean:.4f}")
|
| 64 |
return mean
|
| 65 |
|
| 66 |
|
| 67 |
+
def grade_easy(env=None, action=None):
|
|
|
|
|
|
|
| 68 |
return _grade_tier("easy")
|
| 69 |
|
| 70 |
|
| 71 |
+
def grade_medium(env=None, action=None):
|
|
|
|
|
|
|
| 72 |
return _grade_tier("medium")
|
| 73 |
|
| 74 |
|
| 75 |
+
def grade_hard(env=None, action=None):
|
|
|
|
|
|
|
| 76 |
return _grade_tier("hard")
|
| 77 |
|
| 78 |
|
| 79 |
+
def grade_expert(env=None, action=None):
|
|
|
|
|
|
|
| 80 |
return _grade_tier("expert")
|
| 81 |
|
| 82 |
|
| 83 |
+
def run_all_graders():
|
|
|
|
| 84 |
results = {
|
| 85 |
+
"easy": grade_easy(),
|
| 86 |
"medium": grade_medium(),
|
| 87 |
+
"hard": grade_hard(),
|
| 88 |
"expert": grade_expert(),
|
| 89 |
}
|
| 90 |
results["overall"] = round(
|
|
|
|
| 104 |
print("=" * 50)
|
| 105 |
for k, v in results.items():
|
| 106 |
bar = "█" * int(v * 20)
|
| 107 |
+
print(f"{k:10s}: {v:.4f} {bar}")
|
openenv.yaml
CHANGED
|
@@ -4,3 +4,20 @@ type: space
|
|
| 4 |
runtime: fastapi
|
| 5 |
app: server.app:app
|
| 6 |
port: 8000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
runtime: fastapi
|
| 5 |
app: server.app:app
|
| 6 |
port: 8000
|
| 7 |
+
|
| 8 |
+
graders:
|
| 9 |
+
- id: easy
|
| 10 |
+
entry: graders:grade_easy
|
| 11 |
+
description: "Easy SQL tasks grader"
|
| 12 |
+
|
| 13 |
+
- id: medium
|
| 14 |
+
entry: graders:grade_medium
|
| 15 |
+
description: "Medium SQL tasks grader"
|
| 16 |
+
|
| 17 |
+
- id: hard
|
| 18 |
+
entry: graders:grade_hard
|
| 19 |
+
description: "Hard SQL tasks grader"
|
| 20 |
+
|
| 21 |
+
- id: expert
|
| 22 |
+
entry: graders:grade_expert
|
| 23 |
+
description: "Expert SQL tasks grader"
|
server/__pycache__/__init__.cpython-311.pyc
CHANGED
|
Binary files a/server/__pycache__/__init__.cpython-311.pyc and b/server/__pycache__/__init__.cpython-311.pyc differ
|
|
|
server/__pycache__/sql_arena_environment.cpython-311.pyc
CHANGED
|
Binary files a/server/__pycache__/sql_arena_environment.cpython-311.pyc and b/server/__pycache__/sql_arena_environment.cpython-311.pyc differ
|
|
|